From fe75f9cbe5fbee4eaed280609c316eb4d90e78ed Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Tue, 2 Jul 2024 18:07:34 +0300 Subject: [PATCH 01/34] add logging implementation --- GANDLF/entrypoints/cli_tool.py | 3 ++- GANDLF/logging_config.yaml | 37 +++++++++++++++++++++++++ GANDLF/utils/gandlf_logger.py | 49 ++++++++++++++++++++++++++++++++++ MANIFEST.in | 1 + setup.py | 2 +- 5 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 GANDLF/logging_config.yaml create mode 100644 GANDLF/utils/gandlf_logger.py diff --git a/GANDLF/entrypoints/cli_tool.py b/GANDLF/entrypoints/cli_tool.py index e6c559acc..275fb1270 100644 --- a/GANDLF/entrypoints/cli_tool.py +++ b/GANDLF/entrypoints/cli_tool.py @@ -3,7 +3,7 @@ import click from .subcommands import cli_subcommands from GANDLF.entrypoints import append_copyright_to_help - +from GANDLF.utils import gandlf_logger_setup from GANDLF import version @@ -22,6 +22,7 @@ def setup_logging(loglevel): @append_copyright_to_help def gandlf(ctx, loglevel): """GANDLF command-line tool.""" + logger = gandlf_logger_setup(__name__) ctx.ensure_object(dict) ctx.obj["LOGLEVEL"] = loglevel setup_logging(loglevel) diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml new file mode 100644 index 000000000..942051fb0 --- /dev/null +++ b/GANDLF/logging_config.yaml @@ -0,0 +1,37 @@ +version: 1 +formatters: + detailed: + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" +filters: + warnings_filter: + (): logging.Filter + name: "py.warnings" + info_only_filter: + (): GANDLF.gandlf_logger.InfoOnlyFilter +handlers: + stdoutHandler: # only display info level + class: logging.StreamHandler + level: INFO + formatter: detailed + filters: [info_only_filter] + stream: ext://sys.stdout + stderrHandler: # display warning and above messages + class: logging.StreamHandler + level: WARNING + formatter: detailed + stream: ext://sys.stderr + rotatingFileHandler: + class: logging.handlers.RotatingFileHandler + level: DEBUG + formatter: detailed + filename: tmp/gandlf/gandlf.log + maxBytes: 51200 + backupCount: 2 +loggers: # you can add your customized logger + debug_logger: + level: DEBUG + handlers: [stdoutHandler, rotatingFileHandler, stderrHandler] + propagate: no +root: + level: DEBUG + handlers: [stdoutHandler, rotatingFileHandler, stderrHandler] diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py new file mode 100644 index 000000000..1f50a0ab0 --- /dev/null +++ b/GANDLF/utils/gandlf_logger.py @@ -0,0 +1,49 @@ +import logging +from logging import config +import yaml +from pathlib import Path +from importlib import resources + + +def gandlf_logger_setup(logger_name,config_path = "logging_config.yaml") -> logging.Logger: + """ + It sets up the logger. Read from logging_config. + Args: + logger_name (str): logger name, the name should be the same in the logging_config + config_path (str): file path for the configuration + Returns: + logging.Logger + """ + + # if config_path == None: + # config_dir = Path.cwd() + # config_path = Path.joinpath(config_dir, "GANDLF/config_gandlf_logger.yaml") + + # create dir for storing the messages + current_dir = Path.cwd() + directory = Path.joinpath(current_dir, "tmp/gandlf") + directory.mkdir(parents=True, exist_ok=True) + + with resources.open_text("GANDLF", config_path) as file: + config_dict = yaml.safe_load(file) + logging.config.dictConfig(config_dict) + + logging.captureWarnings(True) + + return logging.getLogger(logger_name) + + +class InfoOnlyFilter(logging.Filter): + """ + Display only INFO messages. + """ + + def filter(self, record): + """ + Determines if the specified record is to be logged. + Args: + record (logging.LogRecord): The log record to be evaluated. + Returns: + bool: True if the log record should be processed, False otherwise. + """ + return record.levelno == logging.INFO \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in index 81a7edc04..7c33dccc4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,3 +4,4 @@ exclude *.toml include setup.py include .dockerignore include Dockerfile-* +include logging_config.yml diff --git a/setup.py b/setup.py index 82c40cab5..a5f9d5c09 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ ] # Any extra files should be located at `GANDLF` module folder (not in repo root) -extra_files = [] +extra_files = ["logging_config.yml"] toplevel_package_excludes = ["testing*"] # specifying version for `black` separately because it is also used to [check for lint](https://github.com/mlcommons/GaNDLF/blob/master/.github/workflows/black.yml) From 08550d642b5dc22e0d91284b3c67bfca1a6df97c Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Tue, 2 Jul 2024 18:15:31 +0300 Subject: [PATCH 02/34] update utils.__init__ --- GANDLF/utils/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GANDLF/utils/__init__.py b/GANDLF/utils/__init__.py index 66d830d3d..2466b1435 100644 --- a/GANDLF/utils/__init__.py +++ b/GANDLF/utils/__init__.py @@ -68,3 +68,4 @@ ) from .data_splitter import split_data +from .gandlf_logger import gandlf_logger_setup, InfoOnlyFilter From ea42f9572432dea5ba35a024f9c1f840f554e783 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Tue, 2 Jul 2024 18:22:54 +0300 Subject: [PATCH 03/34] change logging_config --- GANDLF/logging_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml index 942051fb0..08f1b6cb0 100644 --- a/GANDLF/logging_config.yaml +++ b/GANDLF/logging_config.yaml @@ -7,7 +7,7 @@ filters: (): logging.Filter name: "py.warnings" info_only_filter: - (): GANDLF.gandlf_logger.InfoOnlyFilter + (): gandlf_logger.InfoOnlyFilter handlers: stdoutHandler: # only display info level class: logging.StreamHandler From f54641cfea9084bf047c8a67895bdd5f52ebfc4a Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Tue, 2 Jul 2024 18:23:40 +0300 Subject: [PATCH 04/34] change logging_config --- GANDLF/logging_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml index 08f1b6cb0..ab0c9da3f 100644 --- a/GANDLF/logging_config.yaml +++ b/GANDLF/logging_config.yaml @@ -7,7 +7,7 @@ filters: (): logging.Filter name: "py.warnings" info_only_filter: - (): gandlf_logger.InfoOnlyFilter + (): utils.gandlf_logger.InfoOnlyFilter handlers: stdoutHandler: # only display info level class: logging.StreamHandler From 10dee45b9e813c0574df838889661ac5aa13044c Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Tue, 2 Jul 2024 22:52:49 +0300 Subject: [PATCH 05/34] change logging_config --- GANDLF/logging_config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml index ab0c9da3f..627bb1b58 100644 --- a/GANDLF/logging_config.yaml +++ b/GANDLF/logging_config.yaml @@ -7,7 +7,7 @@ filters: (): logging.Filter name: "py.warnings" info_only_filter: - (): utils.gandlf_logger.InfoOnlyFilter + (): GANDLF.utils.gandlf_logger.InfoOnlyFilter handlers: stdoutHandler: # only display info level class: logging.StreamHandler From e0aa707ac455aa170ca3a6b2ff8c64adce3d1851 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Tue, 2 Jul 2024 23:04:49 +0300 Subject: [PATCH 06/34] blacked gandlf_logger --- GANDLF/utils/gandlf_logger.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 1f50a0ab0..f272d2612 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -5,7 +5,9 @@ from importlib import resources -def gandlf_logger_setup(logger_name,config_path = "logging_config.yaml") -> logging.Logger: +def gandlf_logger_setup( + logger_name, config_path="logging_config.yaml" +) -> logging.Logger: """ It sets up the logger. Read from logging_config. Args: @@ -15,10 +17,6 @@ def gandlf_logger_setup(logger_name,config_path = "logging_config.yaml") -> logg logging.Logger """ - # if config_path == None: - # config_dir = Path.cwd() - # config_path = Path.joinpath(config_dir, "GANDLF/config_gandlf_logger.yaml") - # create dir for storing the messages current_dir = Path.cwd() directory = Path.joinpath(current_dir, "tmp/gandlf") @@ -46,4 +44,4 @@ def filter(self, record): Returns: bool: True if the log record should be processed, False otherwise. """ - return record.levelno == logging.INFO \ No newline at end of file + return record.levelno == logging.INFO From d5493f12180587dd46f8c4e54f16346b33b71b67 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 01:26:57 +0300 Subject: [PATCH 07/34] add gandlf_setup in the entrypoints --- GANDLF/compute/forward_pass.py | 5 +++-- GANDLF/entrypoints/anonymizer.py | 2 ++ GANDLF/entrypoints/cli_tool.py | 4 ++-- GANDLF/entrypoints/collect_stats.py | 2 ++ GANDLF/entrypoints/config_generator.py | 2 ++ GANDLF/entrypoints/construct_csv.py | 2 ++ GANDLF/entrypoints/debug_info.py | 3 ++- GANDLF/entrypoints/deploy.py | 2 ++ GANDLF/entrypoints/generate_metrics.py | 2 ++ GANDLF/entrypoints/optimize_model.py | 2 ++ GANDLF/entrypoints/patch_miner.py | 2 ++ GANDLF/entrypoints/preprocess.py | 2 ++ GANDLF/entrypoints/recover_config.py | 2 ++ GANDLF/entrypoints/run.py | 2 ++ GANDLF/entrypoints/split_csv.py | 2 ++ GANDLF/entrypoints/verify_install.py | 2 ++ GANDLF/logging_config.yaml | 10 ++++++---- GANDLF/utils/gandlf_logger.py | 5 +++-- 18 files changed, 42 insertions(+), 11 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 9da87b8ff..f010b0fff 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -1,7 +1,7 @@ import os import pathlib from typing import Optional, Tuple - +import logging import numpy as np import pandas as pd import SimpleITK as sitk @@ -22,6 +22,7 @@ ) from GANDLF.metrics import overall_stats from tqdm import tqdm +from GANDLF.utils import gandlf_logger_setup def validate_network( @@ -116,7 +117,7 @@ def validate_network( tqdm(valid_dataloader, desc="Looping over " + mode + " data") ): if params["verbose"]: - print("== Current subject:", subject["subject_id"], flush=True) + logging.debug(f'== Current subject: {subject["subject_id"]}') # ensure spacing is always present in params and is always subject-specific params["subject_spacing"] = None diff --git a/GANDLF/entrypoints/anonymizer.py b/GANDLF/entrypoints/anonymizer.py index 7bf8fdfad..e990f9d14 100644 --- a/GANDLF/entrypoints/anonymizer.py +++ b/GANDLF/entrypoints/anonymizer.py @@ -12,6 +12,7 @@ from GANDLF.anonymize import run_anonymizer from GANDLF.cli import copyrightMessage from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _anonymize_images( @@ -77,6 +78,7 @@ def new_way(input_dir, config, modality, output_file): + "`gandlf_anonymizer` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_Anonymize", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/cli_tool.py b/GANDLF/entrypoints/cli_tool.py index 275fb1270..7a61e58eb 100644 --- a/GANDLF/entrypoints/cli_tool.py +++ b/GANDLF/entrypoints/cli_tool.py @@ -22,10 +22,10 @@ def setup_logging(loglevel): @append_copyright_to_help def gandlf(ctx, loglevel): """GANDLF command-line tool.""" - logger = gandlf_logger_setup(__name__) ctx.ensure_object(dict) ctx.obj["LOGLEVEL"] = loglevel - setup_logging(loglevel) + # setup_logging(loglevel) + gandlf_logger_setup() # registers subcommands: `gandlf anonymizer`, `gandlf run`, etc. diff --git a/GANDLF/entrypoints/collect_stats.py b/GANDLF/entrypoints/collect_stats.py index ba21f0c16..81da499b9 100644 --- a/GANDLF/entrypoints/collect_stats.py +++ b/GANDLF/entrypoints/collect_stats.py @@ -14,6 +14,7 @@ from GANDLF.cli import copyrightMessage from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils import gandlf_logger_setup def plot_all(df_training, df_validation, df_testing, output_plot_dir): @@ -205,6 +206,7 @@ def new_way(model_dir: str, output_dir: str): + "`gandlf_collectStats` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_CollectStats", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/config_generator.py b/GANDLF/entrypoints/config_generator.py index 51a7309d3..7be011790 100644 --- a/GANDLF/entrypoints/config_generator.py +++ b/GANDLF/entrypoints/config_generator.py @@ -4,6 +4,7 @@ from GANDLF.cli import config_generator, copyrightMessage from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _generate_config(config: str, strategy: str, output: str): @@ -46,6 +47,7 @@ def new_way(config, strategy, output): + "`gandlf_configGenerator` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_ConfigGenerator", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/construct_csv.py b/GANDLF/entrypoints/construct_csv.py index d56db9988..342825180 100644 --- a/GANDLF/entrypoints/construct_csv.py +++ b/GANDLF/entrypoints/construct_csv.py @@ -14,6 +14,7 @@ from GANDLF.utils import writeTrainingCSV from GANDLF.cli import copyrightMessage +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _construct_csv( @@ -119,6 +120,7 @@ def new_way( + "`gandlf_constructCSV` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_ConstructCSV", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/debug_info.py b/GANDLF/entrypoints/debug_info.py index 9890e2813..aab08f250 100644 --- a/GANDLF/entrypoints/debug_info.py +++ b/GANDLF/entrypoints/debug_info.py @@ -7,7 +7,7 @@ from GANDLF import __version__ from GANDLF.entrypoints import append_copyright_to_help from GANDLF.utils import get_git_hash - +from GANDLF.utils import gandlf_logger_setup def _debug_info(): print(f"GANDLF version: {__version__}") @@ -38,6 +38,7 @@ def new_way(): ) def old_way(): _debug_info() + gandlf_logger_setup() if __name__ == "__main__": diff --git a/GANDLF/entrypoints/deploy.py b/GANDLF/entrypoints/deploy.py index dfcd22d54..dba542ee1 100644 --- a/GANDLF/entrypoints/deploy.py +++ b/GANDLF/entrypoints/deploy.py @@ -16,6 +16,7 @@ copyrightMessage, ) from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _deploy( @@ -157,6 +158,7 @@ def new_way( + "`gandlf_deploy` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_Deploy", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/generate_metrics.py b/GANDLF/entrypoints/generate_metrics.py index 019da6433..a9792b396 100644 --- a/GANDLF/entrypoints/generate_metrics.py +++ b/GANDLF/entrypoints/generate_metrics.py @@ -11,6 +11,7 @@ from GANDLF.cli import copyrightMessage from GANDLF.cli.generate_metrics import generate_metrics_dict from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _generate_metrics(input_data: str, config: str, output_file: Optional[str]): @@ -63,6 +64,7 @@ def new_way(config: str, input_data: str, output_file: Optional[str], raw_input: + "`gandlf_generateMetrics` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_Metrics", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/optimize_model.py b/GANDLF/entrypoints/optimize_model.py index 7cf5e9a7f..53cd0c53b 100644 --- a/GANDLF/entrypoints/optimize_model.py +++ b/GANDLF/entrypoints/optimize_model.py @@ -9,6 +9,7 @@ from GANDLF.cli import copyrightMessage, post_training_model_optimization from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _optimize_model(model: str, config: Optional[str]): @@ -47,6 +48,7 @@ def new_way(model: str, config: Optional[str]): + "`gandlf_optimizeModel` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_OptimizeModel", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/patch_miner.py b/GANDLF/entrypoints/patch_miner.py index 4398765b8..44e7f4239 100644 --- a/GANDLF/entrypoints/patch_miner.py +++ b/GANDLF/entrypoints/patch_miner.py @@ -10,6 +10,7 @@ from GANDLF.cli.patch_extraction import patch_extraction from GANDLF.cli import copyrightMessage from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _mine_patches(input_path: str, output_dir: str, config: Optional[str]): @@ -55,6 +56,7 @@ def new_way(input_csv: str, output_dir: str, config: Optional[str]): + "`gandlf_patchMiner` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_PatchMiner", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/preprocess.py b/GANDLF/entrypoints/preprocess.py index f9bdaf006..eccb00fb4 100644 --- a/GANDLF/entrypoints/preprocess.py +++ b/GANDLF/entrypoints/preprocess.py @@ -8,6 +8,7 @@ from deprecated import deprecated from GANDLF.cli import preprocess_and_save, copyrightMessage from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _preprocess( @@ -110,6 +111,7 @@ def new_way( + "`gandlf_preprocess` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_Preprocess", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/recover_config.py b/GANDLF/entrypoints/recover_config.py index ad5967659..36bed0070 100644 --- a/GANDLF/entrypoints/recover_config.py +++ b/GANDLF/entrypoints/recover_config.py @@ -9,6 +9,7 @@ from GANDLF.cli import copyrightMessage, recover_config from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _recover_config(model_dir: Optional[str], mlcube: bool, output_file: str): @@ -63,6 +64,7 @@ def new_way(model_dir, mlcube, output_file): + "`gandlf_recoverConfig` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_RecoverConfig", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/run.py b/GANDLF/entrypoints/run.py index d696e14ee..5c11a7091 100644 --- a/GANDLF/entrypoints/run.py +++ b/GANDLF/entrypoints/run.py @@ -14,6 +14,7 @@ from GANDLF import version from GANDLF.cli import main_run, copyrightMessage from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _run( @@ -184,6 +185,7 @@ def new_way( + "`gandlf_run` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/split_csv.py b/GANDLF/entrypoints/split_csv.py index fe4519d01..eecda7c6c 100644 --- a/GANDLF/entrypoints/split_csv.py +++ b/GANDLF/entrypoints/split_csv.py @@ -12,6 +12,7 @@ from GANDLF.cli import copyrightMessage, split_data_and_save_csvs from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _split_csv(input_csv: str, output_dir: str, config_path: Optional[str]): @@ -64,6 +65,7 @@ def new_way(input_csv: str, output_dir: str, config: Optional[str]): + "`gandlf_splitCSV` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_SplitCSV", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/verify_install.py b/GANDLF/entrypoints/verify_install.py index b891542a7..549ad2ad8 100644 --- a/GANDLF/entrypoints/verify_install.py +++ b/GANDLF/entrypoints/verify_install.py @@ -6,6 +6,7 @@ from deprecated import deprecated from GANDLF.entrypoints import append_copyright_to_help +from GANDLF.utils.gandlf_logger import gandlf_logger_setup def _verify_install(): @@ -42,6 +43,7 @@ def new_way(): + "`gandlf_verifyInstall` script would be deprecated soon." ) def old_way(): + gandlf_logger_setup() argparse.ArgumentParser( prog="GANDLF_VerifyInstall", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml index 627bb1b58..ec305f19d 100644 --- a/GANDLF/logging_config.yaml +++ b/GANDLF/logging_config.yaml @@ -2,6 +2,8 @@ version: 1 formatters: detailed: format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + detailed1: + format: "%(asctime)s - %(name)s - %(levelname)s - %(pathname)s:%(lineno)d - %(message)s" filters: warnings_filter: (): logging.Filter @@ -12,20 +14,20 @@ handlers: stdoutHandler: # only display info level class: logging.StreamHandler level: INFO - formatter: detailed + formatter: detailed1 filters: [info_only_filter] stream: ext://sys.stdout stderrHandler: # display warning and above messages class: logging.StreamHandler level: WARNING - formatter: detailed + formatter: detailed1 stream: ext://sys.stderr rotatingFileHandler: class: logging.handlers.RotatingFileHandler level: DEBUG - formatter: detailed + formatter: detailed1 filename: tmp/gandlf/gandlf.log - maxBytes: 51200 + maxBytes: 10485760 backupCount: 2 loggers: # you can add your customized logger debug_logger: diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index f272d2612..75edc957d 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -6,7 +6,7 @@ def gandlf_logger_setup( - logger_name, config_path="logging_config.yaml" + config_path="logging_config.yaml" ) -> logging.Logger: """ It sets up the logger. Read from logging_config. @@ -22,13 +22,14 @@ def gandlf_logger_setup( directory = Path.joinpath(current_dir, "tmp/gandlf") directory.mkdir(parents=True, exist_ok=True) + with resources.open_text("GANDLF", config_path) as file: config_dict = yaml.safe_load(file) logging.config.dictConfig(config_dict) logging.captureWarnings(True) - return logging.getLogger(logger_name) + # return logging.getLogger(logger_name) class InfoOnlyFilter(logging.Filter): From 0d1ec65cb5083134679bb70dead4861bb26592d4 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 01:28:34 +0300 Subject: [PATCH 08/34] blacked gandlf_logger --- GANDLF/utils/gandlf_logger.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 75edc957d..17560353c 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -5,9 +5,7 @@ from importlib import resources -def gandlf_logger_setup( - config_path="logging_config.yaml" -) -> logging.Logger: +def gandlf_logger_setup(config_path="logging_config.yaml") -> logging.Logger: """ It sets up the logger. Read from logging_config. Args: @@ -22,7 +20,6 @@ def gandlf_logger_setup( directory = Path.joinpath(current_dir, "tmp/gandlf") directory.mkdir(parents=True, exist_ok=True) - with resources.open_text("GANDLF", config_path) as file: config_dict = yaml.safe_load(file) logging.config.dictConfig(config_dict) From 8ee6308b3e1e667b538a4d9b4473a38c695e2939 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 01:30:05 +0300 Subject: [PATCH 09/34] blacked some files --- GANDLF/entrypoints/debug_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/GANDLF/entrypoints/debug_info.py b/GANDLF/entrypoints/debug_info.py index aab08f250..e5f2d8a25 100644 --- a/GANDLF/entrypoints/debug_info.py +++ b/GANDLF/entrypoints/debug_info.py @@ -9,6 +9,7 @@ from GANDLF.utils import get_git_hash from GANDLF.utils import gandlf_logger_setup + def _debug_info(): print(f"GANDLF version: {__version__}") print(f"Git hash: {get_git_hash()}") From 04fee16a94561297f7e47d74c2acc444cf98ff47 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 01:42:32 +0300 Subject: [PATCH 10/34] blacked forward_pass --- GANDLF/compute/forward_pass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index f010b0fff..7eee2c190 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -117,7 +117,7 @@ def validate_network( tqdm(valid_dataloader, desc="Looping over " + mode + " data") ): if params["verbose"]: - logging.debug(f'== Current subject: {subject["subject_id"]}') + logging.debug(f'== Current subject: {subject["subject_id"]}') # ensure spacing is always present in params and is always subject-specific params["subject_spacing"] = None From b5203cf6b75b6e13279ba11cf1298a84cf151733 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 12:20:13 +0300 Subject: [PATCH 11/34] add logging testing --- .gitignore | 1 + testing/test_full.py | 5835 +++++++++++++++++++++--------------------- 2 files changed, 2928 insertions(+), 2908 deletions(-) diff --git a/.gitignore b/.gitignore index 5a329fd60..d40b4a5df 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ tutorials/classification_medmnist_notebook/output_stats tutorials/classification_medmnist_notebook/model tutorials/classification_medmnist_notebook/dataset/*.csv testing/test_deploy +tmp diff --git a/testing/test_full.py b/testing/test_full.py index 957ad52f2..06150bfb9 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -3,6 +3,7 @@ import SimpleITK as sitk import numpy as np import pandas as pd +import logging from pydicom.data import get_testdata_file import cv2 @@ -242,2921 +243,2939 @@ def write_temp_config_path(parameters_to_write): # these are helper functions to be used in other tests -def test_train_segmentation_rad_2d(device): - print("03: Starting 2D Rad segmentation tests") - # read and parse csv - parameters = parseConfig( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = 3 - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters["data_preprocessing"]["resize_image"] = [224, 224] - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # read and initialize parameters for specific data dimension - for model in all_models_segmentation: - if model == "imagenet_unet": - # imagenet_unet encoder needs to be toned down for small patch size - parameters["model"]["encoder_name"] = "mit_b0" - parameters["model"]["encoder_depth"] = 3 - parameters["model"]["decoder_channels"] = (64, 32, 16) - parameters["model"]["final_layer"] = random.choice( - ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] - ) - parameters["model"]["converter_type"] = random.choice( - ["acs", "soft", "conv3d"] - ) - - if model == "dynunet": - # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py - parameters["model"]["kernel_size"] = (3, 3, 3, 1) - parameters["model"]["strides"] = (1, 1, 1, 1) - parameters["model"]["deep_supervision"] = False - - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_segmentation_sdnet_rad_2d(device): - print("04: Starting 2D Rad segmentation tests") - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - # patch_size is custom for sdnet - parameters["patch_size"] = [224, 224, 1] - parameters["batch_size"] = 2 - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["num_channels"] = 1 - parameters["model"]["architecture"] = "sdnet" - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - sanitize_outputDir() - - sanitize_outputDir() - - print("passed") - - -def test_train_segmentation_rad_3d(device): - print("05: Starting 3D Rad segmentation tests") - # read and parse csv - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_segmentation.csv" - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - parameters["model"]["class_list"] = [0, 1] - parameters["model"]["final_layer"] = "softmax" - parameters["model"]["amp"] = True - parameters["in_memory"] = True - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for model in all_models_segmentation: - if model == "imagenet_unet": - # imagenet_unet encoder needs to be toned down for small patch size - parameters["model"]["encoder_name"] = "mit_b0" - with pytest.raises(Exception) as exc_info: - _ = global_models_dict[model](parameters) - print("Exception raised:", exc_info.value) - parameters["model"]["encoder_name"] = "resnet34" - parameters["model"]["encoder_depth"] = 3 - parameters["model"]["decoder_channels"] = (64, 32, 16) - parameters["model"]["final_layer"] = random.choice( - ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] - ) - parameters["model"]["converter_type"] = random.choice( - ["acs", "soft", "conv3d"] - ) - - if model == "dynunet": - # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py - parameters["model"]["kernel_size"] = (3, 3, 3, 1) - parameters["model"]["strides"] = (1, 1, 1, 1) - parameters["model"]["deep_supervision"] = False - - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_regression_rad_2d(device): - print("06: Starting 2D Rad regression tests") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_regression.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["amp"] = False - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_regression.csv" - ) - parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] - parameters["scaling_factor"] = 1 - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for model in all_models_regression: - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_regression_rad_2d_imagenet(device): - print("07: Starting 2D Rad regression tests for imagenet models") - # read and initialize parameters for specific data dimension - print("Starting 2D Rad regression tests for imagenet models") - parameters = ConfigManager( - testingDir + "/config_regression.yaml", version_check_flag=False - ) - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["amp"] = False - parameters["model"]["print_summary"] = False - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_regression.csv" - ) - parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] - parameters["scaling_factor"] = 1 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for model in all_models_classification: - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = 1 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_regression_brainage_rad_2d(device): - print("08: Starting brain age tests") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_regression.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["amp"] = False - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_regression.csv" - ) - parameters["model"]["num_channels"] = 3 - parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] - parameters["scaling_factor"] = 1 - parameters["model"]["architecture"] = "brain_age" - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - # parameters_temp = copy.deepcopy(parameters) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - # file_config_temp = write_temp_config_path(parameters_temp) - model_path = os.path.join(outputDir, "brain_age_best.pth.tar") - config_path = os.path.join(outputDir, "parameters.pkl") - optimization_result = post_training_model_optimization(model_path, config_path) - assert optimization_result == False, "Optimization should fail" - - sanitize_outputDir() - - print("passed") - - -def test_train_regression_rad_3d(device): - print("09: Starting 3D Rad regression tests") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_regression.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_regression.csv" - ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for model in all_models_regression: - if "efficientnet" in model: - parameters["patch_size"] = [16, 16, 16] - else: - parameters["patch_size"] = patch_size["3D"] - - if model == "imagenet_unet": - parameters["model"]["depth"] = 2 - parameters["model"]["decoder_channels"] = [32, 16] - parameters["model"]["encoder_weights"] = "None" - parameters["model"]["converter_type"] = random.choice( - ["acs", "soft", "conv3d"] - ) - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_classification_rad_2d(device): - print("10: Starting 2D Rad classification tests") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["track_memory_usage"] = True - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = 3 - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for model in all_models_regression: - if model == "imagenet_unet": - parameters["model"]["depth"] = 2 - parameters["model"]["decoder_channels"] = [32, 16] - parameters["model"]["encoder_weights"] = "None" - parameters["model"]["converter_type"] = random.choice( - ["acs", "soft", "conv3d"] - ) - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - # ensure sigmoid and softmax activations are tested for imagenet models - for activation_type in ["sigmoid", "softmax"]: - parameters["model"]["architecture"] = "imagenet_vgg11" - parameters["model"]["final_layer"] = activation_type - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_classification_rad_3d(device): - print("11: Starting 3D Rad classification tests") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - # loop through selected models and train for single epoch - for model in all_models_regression: - if "efficientnet" in model: - parameters["patch_size"] = [16, 16, 16] - else: - parameters["patch_size"] = patch_size["3D"] - if model == "imagenet_unet": - parameters["model"]["encoder_name"] = "efficientnet-b0" - parameters["model"]["depth"] = 1 - parameters["model"]["decoder_channels"] = [64] - parameters["model"]["final_layer"] = random.choice( - ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] - ) - parameters["model"]["converter_type"] = random.choice( - ["acs", "soft", "conv3d"] - ) - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_resume_inference_classification_rad_3d(device): - print("12: Starting 3D Rad classification tests for resume and reset") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - model = all_models_regression[0] - parameters["model"]["architecture"] = model - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - ## testing resume with parameter updates - parameters["num_epochs"] = 2 - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - parameters["model"]["save_at_every_epoch"] = True - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=True, - reset=False, - ) - - ## testing resume without parameter updates - parameters["num_epochs"] = 1 - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=False, - ) - - parameters["output_dir"] = outputDir # this is in inference mode - InferenceManager( - dataframe=training_data, - modelDir=outputDir, - parameters=parameters, - device=device, - ) - sanitize_outputDir() - - print("passed") - - -def test_train_inference_optimize_classification_rad_3d(device): - print("13: Starting 3D Rad segmentation tests for optimization") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - parameters["model"]["architecture"] = all_models_regression[0] - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - # parameters_temp = copy.deepcopy(parameters) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - # file_config_temp = write_temp_config_path(parameters_temp) - model_path = os.path.join(outputDir, all_models_regression[0] + "_best.pth.tar") - config_path = os.path.join(outputDir, "parameters.pkl") - optimization_result = post_training_model_optimization(model_path, config_path) - assert optimization_result == True, "Optimization should pass" - - ## testing inference - for model_type in all_model_type: - parameters["model"]["type"] = model_type - parameters["output_dir"] = outputDir # this is in inference mode - InferenceManager( - dataframe=training_data, - modelDir=outputDir, - parameters=parameters, - device=device, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_inference_optimize_segmentation_rad_2d(device): - print("14: Starting 2D Rad segmentation tests for optimization") - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - parameters["patch_size"] = patch_size["2D"] - parameters["modality"] = "rad" - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["save_output"] = True - parameters["model"]["num_channels"] = 3 - parameters["metrics"] = ["dice"] - parameters["model"]["architecture"] = "resunet" - parameters["model"]["onnx_export"] = True - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - ## testing inference - for model_type in all_model_type: - parameters["model"]["type"] = model_type - parameters["output_dir"] = outputDir # this is in inference mode - InferenceManager( - dataframe=training_data, - modelDir=outputDir, - parameters=parameters, - device=device, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_inference_classification_with_logits_single_fold_rad_3d(device): - print("15: Starting 3D Rad classification tests for single fold logits inference") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - parameters["model"]["final_layer"] = "logits" - # loop through selected models and train for single epoch - model = all_models_regression[0] - parameters["model"]["architecture"] = model - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - ## add stratified splitting - parameters["nested_training"]["stratified"] = True - - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # duplicate the data to test stratified sampling - training_data_duplicate = training_data._append(training_data) - for _ in range(1): - training_data_duplicate = training_data_duplicate._append( - training_data_duplicate - ) - training_data_duplicate.reset_index(drop=True, inplace=True) - # ensure subjects are not duplicated - training_data_duplicate["SubjectID"] = training_data_duplicate.index - - # ensure every part of the code is tested - for folds in [2, 1, -5]: - ## add stratified folding information - parameters["nested_training"]["testing"] = folds - parameters["nested_training"]["validation"] = folds if folds != 1 else -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data_duplicate, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - ## this is to test if inference can run without having ground truth column - training_data.drop("ValueToPredict", axis=1, inplace=True) - training_data.drop("Label", axis=1, inplace=True) - temp_infer_csv = os.path.join(outputDir, "temp_infer_csv.csv") - training_data.to_csv(temp_infer_csv, index=False) - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV(temp_infer_csv) - parameters["output_dir"] = outputDir # this is in inference mode - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - parameters["model"]["final_layer"] = "logits" - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - model = all_models_regression[0] - parameters["model"]["architecture"] = model - parameters["model"]["onnx_export"] = False - InferenceManager( - dataframe=training_data, - modelDir=outputDir, - parameters=parameters, - device=device, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_inference_classification_with_logits_multiple_folds_rad_3d(device): - print("16: Starting 3D Rad classification tests for multi-fold logits inference") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - parameters["model"]["final_layer"] = "logits" - # necessary for n-fold cross-validation inference - parameters["nested_training"]["validation"] = 2 - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - model = all_models_regression[0] - parameters["model"]["architecture"] = model - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - parameters["output_dir"] = outputDir # this is in inference mode - InferenceManager( - dataframe=training_data, - modelDir=outputDir + "," + outputDir, - parameters=parameters, - device=device, - ) +# def test_train_segmentation_rad_2d(device): +# print("03: Starting 2D Rad segmentation tests") +# # read and parse csv +# parameters = parseConfig( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters["data_preprocessing"]["resize_image"] = [224, 224] +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # read and initialize parameters for specific data dimension +# for model in all_models_segmentation: +# if model == "imagenet_unet": +# # imagenet_unet encoder needs to be toned down for small patch size +# parameters["model"]["encoder_name"] = "mit_b0" +# parameters["model"]["encoder_depth"] = 3 +# parameters["model"]["decoder_channels"] = (64, 32, 16) +# parameters["model"]["final_layer"] = random.choice( +# ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] +# ) +# parameters["model"]["converter_type"] = random.choice( +# ["acs", "soft", "conv3d"] +# ) + +# if model == "dynunet": +# # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py +# parameters["model"]["kernel_size"] = (3, 3, 3, 1) +# parameters["model"]["strides"] = (1, 1, 1, 1) +# parameters["model"]["deep_supervision"] = False + +# parameters["model"]["architecture"] = model +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_segmentation_sdnet_rad_2d(device): +# print("04: Starting 2D Rad segmentation tests") +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# # patch_size is custom for sdnet +# parameters["patch_size"] = [224, 224, 1] +# parameters["batch_size"] = 2 +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["num_channels"] = 1 +# parameters["model"]["architecture"] = "sdnet" +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# sanitize_outputDir() + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_segmentation_rad_3d(device): +# print("05: Starting 3D Rad segmentation tests") +# # read and parse csv +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_segmentation.csv" +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# parameters["model"]["class_list"] = [0, 1] +# parameters["model"]["final_layer"] = "softmax" +# parameters["model"]["amp"] = True +# parameters["in_memory"] = True +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# for model in all_models_segmentation: +# if model == "imagenet_unet": +# # imagenet_unet encoder needs to be toned down for small patch size +# parameters["model"]["encoder_name"] = "mit_b0" +# with pytest.raises(Exception) as exc_info: +# _ = global_models_dict[model](parameters) +# print("Exception raised:", exc_info.value) +# parameters["model"]["encoder_name"] = "resnet34" +# parameters["model"]["encoder_depth"] = 3 +# parameters["model"]["decoder_channels"] = (64, 32, 16) +# parameters["model"]["final_layer"] = random.choice( +# ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] +# ) +# parameters["model"]["converter_type"] = random.choice( +# ["acs", "soft", "conv3d"] +# ) + +# if model == "dynunet": +# # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py +# parameters["model"]["kernel_size"] = (3, 3, 3, 1) +# parameters["model"]["strides"] = (1, 1, 1, 1) +# parameters["model"]["deep_supervision"] = False + +# parameters["model"]["architecture"] = model +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_regression_rad_2d(device): +# print("06: Starting 2D Rad regression tests") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_regression.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["amp"] = False +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_regression.csv" +# ) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] +# parameters["scaling_factor"] = 1 +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# for model in all_models_regression: +# parameters["model"]["architecture"] = model +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_regression_rad_2d_imagenet(device): +# print("07: Starting 2D Rad regression tests for imagenet models") +# # read and initialize parameters for specific data dimension +# print("Starting 2D Rad regression tests for imagenet models") +# parameters = ConfigManager( +# testingDir + "/config_regression.yaml", version_check_flag=False +# ) +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["amp"] = False +# parameters["model"]["print_summary"] = False +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_regression.csv" +# ) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] +# parameters["scaling_factor"] = 1 +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# for model in all_models_classification: +# parameters["model"]["architecture"] = model +# parameters["nested_training"]["testing"] = 1 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_regression_brainage_rad_2d(device): +# print("08: Starting brain age tests") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_regression.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["amp"] = False +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_regression.csv" +# ) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] +# parameters["scaling_factor"] = 1 +# parameters["model"]["architecture"] = "brain_age" +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# # parameters_temp = copy.deepcopy(parameters) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# # file_config_temp = write_temp_config_path(parameters_temp) +# model_path = os.path.join(outputDir, "brain_age_best.pth.tar") +# config_path = os.path.join(outputDir, "parameters.pkl") +# optimization_result = post_training_model_optimization(model_path, config_path) +# assert optimization_result == False, "Optimization should fail" + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_regression_rad_3d(device): +# print("09: Starting 3D Rad regression tests") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_regression.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_regression.csv" +# ) +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# for model in all_models_regression: +# if "efficientnet" in model: +# parameters["patch_size"] = [16, 16, 16] +# else: +# parameters["patch_size"] = patch_size["3D"] + +# if model == "imagenet_unet": +# parameters["model"]["depth"] = 2 +# parameters["model"]["decoder_channels"] = [32, 16] +# parameters["model"]["encoder_weights"] = "None" +# parameters["model"]["converter_type"] = random.choice( +# ["acs", "soft", "conv3d"] +# ) +# parameters["model"]["architecture"] = model +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_classification_rad_2d(device): +# print("10: Starting 2D Rad classification tests") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["track_memory_usage"] = True +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# for model in all_models_regression: +# if model == "imagenet_unet": +# parameters["model"]["depth"] = 2 +# parameters["model"]["decoder_channels"] = [32, 16] +# parameters["model"]["encoder_weights"] = "None" +# parameters["model"]["converter_type"] = random.choice( +# ["acs", "soft", "conv3d"] +# ) +# parameters["model"]["architecture"] = model +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# # ensure sigmoid and softmax activations are tested for imagenet models +# for activation_type in ["sigmoid", "softmax"]: +# parameters["model"]["architecture"] = "imagenet_vgg11" +# parameters["model"]["final_layer"] = activation_type +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_classification_rad_3d(device): +# print("11: Starting 3D Rad classification tests") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# # loop through selected models and train for single epoch +# for model in all_models_regression: +# if "efficientnet" in model: +# parameters["patch_size"] = [16, 16, 16] +# else: +# parameters["patch_size"] = patch_size["3D"] +# if model == "imagenet_unet": +# parameters["model"]["encoder_name"] = "efficientnet-b0" +# parameters["model"]["depth"] = 1 +# parameters["model"]["decoder_channels"] = [64] +# parameters["model"]["final_layer"] = random.choice( +# ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] +# ) +# parameters["model"]["converter_type"] = random.choice( +# ["acs", "soft", "conv3d"] +# ) +# parameters["model"]["architecture"] = model +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_resume_inference_classification_rad_3d(device): +# print("12: Starting 3D Rad classification tests for resume and reset") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# model = all_models_regression[0] +# parameters["model"]["architecture"] = model +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# ## testing resume with parameter updates +# parameters["num_epochs"] = 2 +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# parameters["model"]["save_at_every_epoch"] = True +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=True, +# reset=False, +# ) + +# ## testing resume without parameter updates +# parameters["num_epochs"] = 1 +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=False, +# ) + +# parameters["output_dir"] = outputDir # this is in inference mode +# InferenceManager( +# dataframe=training_data, +# modelDir=outputDir, +# parameters=parameters, +# device=device, +# ) +# sanitize_outputDir() + +# print("passed") + + +# def test_train_inference_optimize_classification_rad_3d(device): +# print("13: Starting 3D Rad segmentation tests for optimization") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# parameters["model"]["architecture"] = all_models_regression[0] +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# # parameters_temp = copy.deepcopy(parameters) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# # file_config_temp = write_temp_config_path(parameters_temp) +# model_path = os.path.join(outputDir, all_models_regression[0] + "_best.pth.tar") +# config_path = os.path.join(outputDir, "parameters.pkl") +# optimization_result = post_training_model_optimization(model_path, config_path) +# assert optimization_result == True, "Optimization should pass" + +# ## testing inference +# for model_type in all_model_type: +# parameters["model"]["type"] = model_type +# parameters["output_dir"] = outputDir # this is in inference mode +# InferenceManager( +# dataframe=training_data, +# modelDir=outputDir, +# parameters=parameters, +# device=device, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_inference_optimize_segmentation_rad_2d(device): +# print("14: Starting 2D Rad segmentation tests for optimization") +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# parameters["patch_size"] = patch_size["2D"] +# parameters["modality"] = "rad" +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["save_output"] = True +# parameters["model"]["num_channels"] = 3 +# parameters["metrics"] = ["dice"] +# parameters["model"]["architecture"] = "resunet" +# parameters["model"]["onnx_export"] = True +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# ## testing inference +# for model_type in all_model_type: +# parameters["model"]["type"] = model_type +# parameters["output_dir"] = outputDir # this is in inference mode +# InferenceManager( +# dataframe=training_data, +# modelDir=outputDir, +# parameters=parameters, +# device=device, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_inference_classification_with_logits_single_fold_rad_3d(device): +# print("15: Starting 3D Rad classification tests for single fold logits inference") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# parameters["model"]["final_layer"] = "logits" +# # loop through selected models and train for single epoch +# model = all_models_regression[0] +# parameters["model"]["architecture"] = model +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# ## add stratified splitting +# parameters["nested_training"]["stratified"] = True + +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # duplicate the data to test stratified sampling +# training_data_duplicate = training_data._append(training_data) +# for _ in range(1): +# training_data_duplicate = training_data_duplicate._append( +# training_data_duplicate +# ) +# training_data_duplicate.reset_index(drop=True, inplace=True) +# # ensure subjects are not duplicated +# training_data_duplicate["SubjectID"] = training_data_duplicate.index + +# # ensure every part of the code is tested +# for folds in [2, 1, -5]: +# ## add stratified folding information +# parameters["nested_training"]["testing"] = folds +# parameters["nested_training"]["validation"] = folds if folds != 1 else -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data_duplicate, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# ## this is to test if inference can run without having ground truth column +# training_data.drop("ValueToPredict", axis=1, inplace=True) +# training_data.drop("Label", axis=1, inplace=True) +# temp_infer_csv = os.path.join(outputDir, "temp_infer_csv.csv") +# training_data.to_csv(temp_infer_csv, index=False) +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV(temp_infer_csv) +# parameters["output_dir"] = outputDir # this is in inference mode +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# parameters["model"]["final_layer"] = "logits" +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# model = all_models_regression[0] +# parameters["model"]["architecture"] = model +# parameters["model"]["onnx_export"] = False +# InferenceManager( +# dataframe=training_data, +# modelDir=outputDir, +# parameters=parameters, +# device=device, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_inference_classification_with_logits_multiple_folds_rad_3d(device): +# print("16: Starting 3D Rad classification tests for multi-fold logits inference") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# parameters["model"]["final_layer"] = "logits" +# # necessary for n-fold cross-validation inference +# parameters["nested_training"]["validation"] = 2 +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# model = all_models_regression[0] +# parameters["model"]["architecture"] = model +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# parameters["output_dir"] = outputDir # this is in inference mode +# InferenceManager( +# dataframe=training_data, +# modelDir=outputDir + "," + outputDir, +# parameters=parameters, +# device=device, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_scheduler_classification_rad_2d(device): +# print("17: Starting 2D Rad segmentation tests for scheduler") +# # read and initialize parameters for specific data dimension +# # loop through selected models and train for single epoch +# for scheduler in global_schedulers_dict: +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["architecture"] = "densenet121" +# parameters["model"]["norm_type"] = "instance" +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters["scheduler"] = {} +# parameters["scheduler"]["type"] = scheduler +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# ## ensure parameters are parsed every single time +# file_config_temp = write_temp_config_path(parameters) + +# parameters = ConfigManager(file_config_temp, version_check_flag=False) +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_optimizer_classification_rad_2d(device): +# print("18: Starting 2D Rad classification tests for optimizer") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["architecture"] = "densenet121" +# parameters["model"]["norm_type"] = "none" +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# for optimizer in global_optimizer_dict: +# parameters["optimizer"] = {} +# parameters["optimizer"]["type"] = optimizer +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# if os.path.exists(outputDir): +# shutil.rmtree(outputDir) # overwrite previous results +# Path(outputDir).mkdir(parents=True, exist_ok=True) +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_clip_train_classification_rad_3d(device): +# print("19: Starting 3D Rad classification tests for clipping") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters["model"]["architecture"] = "vgg16" +# parameters["model"]["norm_type"] = "None" +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# for clip_mode in all_clip_modes: +# parameters["clip_mode"] = clip_mode +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# sanitize_outputDir() + +# print("passed") + + +# def test_train_normtype_segmentation_rad_3d(device): +# print("20: Starting 3D Rad segmentation tests for normtype") +# # read and initialize parameters for specific data dimension +# # read and parse csv +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_segmentation.csv" +# ) +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# parameters["model"]["class_list"] = [0, 1] +# parameters["model"]["amp"] = True +# parameters["save_output"] = True +# parameters["data_postprocessing"] = {"fill_holes"} +# parameters["in_memory"] = True +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) + +# # these should raise exceptions +# for norm_type in ["none", None]: +# parameters["model"]["norm_type"] = norm_type +# file_config_temp = write_temp_config_path(parameters) +# with pytest.raises(Exception) as exc_info: +# parameters = ConfigManager(file_config_temp, version_check_flag=False) + +# print("Exception raised:", exc_info.value) + +# # loop through selected models and train for single epoch +# for norm in all_norm_types: +# for model in ["resunet", "unet", "fcn", "unetr"]: +# parameters["model"]["architecture"] = model +# parameters["model"]["norm_type"] = norm +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# if os.path.isdir(outputDir): +# shutil.rmtree(outputDir) # overwrite previous results +# Path(outputDir).mkdir(parents=True, exist_ok=True) +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_metrics_segmentation_rad_2d(device): +# print("21: Starting 2D Rad segmentation tests for metrics") +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["data_postprocessing"] = {"mapping": {0: 0, 255: 1}} +# parameters["model"]["amp"] = True +# parameters["save_output"] = True +# parameters["model"]["num_channels"] = 3 +# parameters["metrics"] = [ +# "dice", +# "hausdorff", +# "hausdorff95", +# "normalized_surface_dice", +# "sensitivity", +# "sensitivity_per_label", +# "specificity_segmentation", +# "specificity_segmentation_per_label", +# "jaccard", +# "jaccard_per_label", +# ] +# parameters["model"]["architecture"] = "resunet" +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# file_config_temp = write_temp_config_path(parameters) + +# parameters = ConfigManager(file_config_temp, version_check_flag=False) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_metrics_regression_rad_2d(device): +# print("22: Starting 2D Rad regression tests for metrics") +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_regression.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_regression.csv" +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["norm_type"] = "instance" +# parameters["model"]["amp"] = False +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["architecture"] = "vgg11" +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = True +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_losses_segmentation_rad_2d(device): +# print("23: Starting 2D Rad segmentation tests for losses") + +# # healper function to read and parse yaml and return parameters +# def get_parameters_after_alteration(loss_type: str) -> dict: +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# parameters["loss_function"] = loss_type +# file_config_temp = write_temp_config_path(parameters) +# # read and parse csv +# parameters = ConfigManager(file_config_temp, version_check_flag=True) +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# # disabling amp because some losses do not support Half, yet +# parameters["model"]["amp"] = False +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["architecture"] = "resunet" +# parameters["metrics"] = ["dice"] +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# return parameters, training_data + +# # loop through selected models and train for single epoch +# for loss_type in [ +# "dc", +# "dc_log", +# "dcce", +# "dcce_logits", +# "tversky", +# "focal", +# "dc_focal", +# "mcc", +# "mcc_log", +# ]: +# parameters, training_data = get_parameters_after_alteration(loss_type) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_config_read(): +# print("24: Starting testing reading configuration") +# parameters = ConfigManager( +# os.path.join(baseConfigDir, "config_all_options.yaml"), version_check_flag=False +# ) +# parameters["data_preprocessing"]["resize_image"] = [128, 128] + +# file_config_temp = write_temp_config_path(parameters) + +# # read and parse csv +# parameters = ConfigManager(file_config_temp, version_check_flag=True) + +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# assert parameters is not None, "parameters is None" +# data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") +# assert data_loader is not None, "data_loader is None" + +# os.remove(file_config_temp) + +# # ensure resize_image is triggered +# parameters["data_preprocessing"].pop("resample") +# parameters["data_preprocessing"].pop("resample_min") +# parameters["data_preprocessing"]["resize_image"] = [128, 128] +# parameters["model"]["print_summary"] = False + +# with open(file_config_temp, "w") as file: +# yaml.dump(parameters, file) + +# parameters = ConfigManager(file_config_temp, version_check_flag=True) + +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# assert parameters is not None, "parameters is None" +# data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") +# assert data_loader is not None, "data_loader is None" + +# os.remove(file_config_temp) + +# # ensure resize_patch is triggered +# parameters["data_preprocessing"].pop("resize_image") +# parameters["data_preprocessing"]["resize_patch"] = [64, 64] + +# with open(file_config_temp, "w") as file: +# yaml.dump(parameters, file) + +# parameters = ConfigManager(file_config_temp, version_check_flag=True) + +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# assert parameters is not None, "parameters is None" +# data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") +# assert data_loader is not None, "data_loader is None" + +# os.remove(file_config_temp) + +# # ensure resize_image is triggered +# parameters["data_preprocessing"].pop("resize_patch") +# parameters["data_preprocessing"]["resize"] = [64, 64] + +# with open(file_config_temp, "w") as file: +# yaml.dump(parameters, file) + +# parameters = ConfigManager(file_config_temp, version_check_flag=True) + +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# assert parameters is not None, "parameters is None" +# data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") +# assert data_loader is not None, "data_loader is None" + +# os.remove(file_config_temp) + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_cli_function_preprocess(): +# print("25: Starting testing cli function preprocess") +# file_config = os.path.join(testingDir, "config_segmentation.yaml") +# sanitize_outputDir() +# file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") + +# input_data_df, _ = parseTrainingCSV(file_data, train=False) +# # add random metadata to ensure it gets preserved +# input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] +# input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) +# input_data_df["metadata_test_int"] = np.random.randint( +# 0, 100, input_data_df.shape[0] +# ) +# temp_csv = os.path.join(outputDir, "temp.csv") +# input_data_df.to_csv(temp_csv) + +# parameters = ConfigManager(file_config) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = "[0, 255||125]" +# # disabling amp because some losses do not support Half, yet +# parameters["model"]["amp"] = False +# parameters["model"]["print_summary"] = False +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["architecture"] = "unet" +# parameters["metrics"] = ["dice"] +# parameters["patch_sampler"] = { +# "type": "label", +# "enable_padding": True, +# "biased_sampling": True, +# } +# parameters["weighted_loss"] = True +# parameters["save_output"] = True +# parameters["data_preprocessing"]["to_canonical"] = None +# parameters["data_preprocessing"]["rgba_to_rgb"] = None + +# file_config_temp = write_temp_config_path(parameters) + +# preprocess_and_save(temp_csv, file_config_temp, outputDir) +# training_data, parameters["headers"] = parseTrainingCSV( +# outputDir + "/data_processed.csv" +# ) + +# # check that the length of training data is what we expect +# assert ( +# len(training_data) == input_data_df.shape[0] +# ), "Number of subjects in dataframe is not same as that of input dataframe" +# assert ( +# len(training_data.columns) == len(input_data_df.columns) + 1 +# ), "Number of columns in output dataframe is not same as that of input dataframe" # the +1 is for the added index column +# sanitize_outputDir() + +# ## regression/classification preprocess +# file_config = os.path.join(testingDir, "config_regression.yaml") +# parameters = ConfigManager(file_config) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["amp"] = False +# # read and parse csv +# parameters["model"]["num_channels"] = 3 +# parameters["scaling_factor"] = 1 +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters["data_preprocessing"]["to_canonical"] = None +# parameters["data_preprocessing"]["rgba_to_rgb"] = None +# file_data = os.path.join(inputDir, "train_2d_rad_regression.csv") +# input_data_df, _ = parseTrainingCSV(file_data, train=False) +# # add random metadata to ensure it gets preserved +# input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] +# input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) +# input_data_df["metadata_test_int"] = np.random.randint( +# 0, 100, input_data_df.shape[0] +# ) +# input_data_df.to_csv(temp_csv) + +# # store this separately for preprocess testing +# with open(file_config_temp, "w") as outfile: +# yaml.dump(parameters, outfile, default_flow_style=False) + +# preprocess_and_save(temp_csv, file_config_temp, outputDir) +# training_data, parameters["headers"] = parseTrainingCSV( +# outputDir + "/data_processed.csv" +# ) + +# # check that the length of training data is what we expect +# assert ( +# len(training_data) == input_data_df.shape[0] +# ), "Number of subjects in dataframe is not same as that of input dataframe" +# assert ( +# len(training_data.columns) == len(input_data_df.columns) + 1 +# ), "Number of columns in output dataframe is not same as that of input dataframe" # the +1 is for the added index column +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_cli_function_mainrun(device): +# print("26: Starting testing cli function main_run") +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) + +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["num_epochs"] = 1 +# parameters["nested_training"]["testing"] = 1 +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["print_summary"] = False +# parameters["model"]["num_channels"] = 3 +# parameters["metrics"] = ["dice"] +# parameters["model"]["architecture"] = "unet" + +# file_config_temp = write_temp_config_path(parameters) + +# file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") + +# main_run( +# file_data, file_config_temp, outputDir, True, device, resume=False, reset=True +# ) +# sanitize_outputDir() + +# with open(file_config_temp, "w") as file: +# yaml.dump(parameters, file) + +# # testing train/valid split +# main_run( +# file_data + "," + file_data, +# file_config_temp, +# outputDir, +# True, +# device, +# resume=False, +# reset=True, +# ) + +# with open(file_config_temp, "w") as file: +# yaml.dump(parameters, file) + +# # testing train/valid/test split with resume +# main_run( +# file_data + "," + file_data + "," + file_data, +# file_config_temp, +# outputDir, +# True, +# device, +# resume=True, +# reset=False, +# ) +# sanitize_outputDir() + +# print("passed") + + +# def test_dataloader_construction_train_segmentation_3d(device): +# print("27: Starting 3D Rad segmentation tests") +# # read and parse csv +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# params_all_preprocessing_and_augs = ConfigManager( +# os.path.join(baseConfigDir, "config_all_options.yaml") +# ) + +# # take preprocessing and augmentations from all options +# for key in ["data_preprocessing", "data_augmentation"]: +# parameters[key] = params_all_preprocessing_and_augs[key] + +# # customize parameters to maximize test coverage +# parameters["data_preprocessing"].pop("normalize", None) +# parameters["data_preprocessing"]["normalize_nonZero"] = None +# parameters["data_preprocessing"]["default_probability"] = 1 +# parameters.pop("nested_training", None) +# parameters["nested_training"] = {} +# parameters["nested_training"]["testing"] = 1 +# parameters["nested_training"]["validation"] = -5 + +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_segmentation.csv" +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["save_training"] = True +# parameters["save_output"] = True +# parameters["model"]["dimension"] = 3 +# parameters["model"]["class_list"] = [0, 1] +# parameters["model"]["amp"] = True +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters["model"]["architecture"] = "unet" +# parameters["weighted_loss"] = False +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters["data_postprocessing"]["mapping"] = {0: 0, 1: 1} +# parameters["data_postprocessing"]["fill_holes"] = True +# parameters["data_postprocessing"]["cca"] = True +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_preprocess_functions(): +# print("28: Starting testing preprocessing functions") +# # initialize an input which has values between [-1,1] +# # checking tensor with last dimension of size 1 +# input_tensor = torch.rand(4, 256, 256, 1) +# input_transformed = global_preprocessing_dict["rgba2rgb"]()(input_tensor) +# assert input_transformed.shape[0] == 3, "Number of channels is not 3" +# assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" + +# input_tensor = torch.rand(3, 256, 256, 1) +# input_transformed = global_preprocessing_dict["rgb2rgba"]()(input_tensor) +# assert input_transformed.shape[0] == 4, "Number of channels is not 4" +# assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" + +# input_tensor = 2 * torch.rand(3, 256, 256, 1) - 1 +# input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) +# input_tensor = 2 * torch.rand(1, 3, 256, 256) - 1 +# input_transformed = global_preprocessing_dict["normalize_imagenet"](input_tensor) +# input_transformed = global_preprocessing_dict["normalize_standardize"](input_tensor) +# input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) +# parameters_dict = {} +# parameters_dict["min"] = 0.25 +# parameters_dict["max"] = 0.75 +# input_transformed = global_preprocessing_dict["threshold"](parameters_dict)( +# input_tensor +# ) +# assert ( +# torch.count_nonzero( +# input_transformed[input_transformed < parameters_dict["min"]] +# > parameters_dict["max"] +# ) +# == 0 +# ), "Input should be thresholded" + +# input_transformed = global_preprocessing_dict["clip"](parameters_dict)(input_tensor) +# assert ( +# torch.count_nonzero( +# input_transformed[input_transformed < parameters_dict["min"]] +# > parameters_dict["max"] +# ) +# == 0 +# ), "Input should be clipped" + +# non_zero_normalizer = global_preprocessing_dict["normalize_nonZero_masked"] +# input_transformed = non_zero_normalizer(input_tensor) +# non_zero_normalizer = global_preprocessing_dict["normalize_positive"] +# input_transformed = non_zero_normalizer(input_tensor) +# non_zero_normalizer = global_preprocessing_dict["normalize_nonZero"] +# input_transformed = non_zero_normalizer(input_tensor) + +# ## stain_normalization checks +# input_tensor = 2 * torch.rand(3, 256, 256, 1) + 10 +# training_data, _ = parseTrainingCSV(inputDir + "/train_2d_rad_segmentation.csv") +# parameters_temp = {} +# parameters_temp["data_preprocessing"] = {} +# parameters_temp["data_preprocessing"]["stain_normalizer"] = { +# "target": training_data["Channel_0"][0] +# } +# for extractor in ["ruifrok", "macenko", "vahadane"]: +# parameters_temp["data_preprocessing"]["stain_normalizer"][ +# "extractor" +# ] = extractor +# non_zero_normalizer = global_preprocessing_dict["stain_normalizer"]( +# parameters_temp["data_preprocessing"]["stain_normalizer"] +# ) +# input_transformed = non_zero_normalizer(input_tensor) + +# ## histogram matching tests +# # histogram equalization +# input_tensor = torch.rand(1, 64, 64, 64) +# parameters_temp = {} +# parameters_temp["data_preprocessing"] = {} +# parameters_temp["data_preprocessing"]["histogram_matching"] = {} +# non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( +# parameters_temp["data_preprocessing"]["histogram_matching"] +# ) +# input_transformed = non_zero_normalizer(input_tensor) +# # adaptive histogram equalization +# parameters_temp = {} +# parameters_temp["data_preprocessing"] = {} +# parameters_temp["data_preprocessing"]["histogram_matching"] = {"target": "adaptive"} +# non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( +# parameters_temp["data_preprocessing"]["histogram_matching"] +# ) +# input_transformed = non_zero_normalizer(input_tensor) +# # histogram matching +# training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_segmentation.csv") +# parameters_temp = {} +# parameters_temp["data_preprocessing"] = {} +# parameters_temp["data_preprocessing"]["histogram_matching"] = { +# "target": training_data["Channel_0"][0] +# } +# non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( +# parameters_temp["data_preprocessing"]["histogram_matching"] +# ) +# input_transformed = non_zero_normalizer(input_tensor) + +# # fill holes +# input_tensor = torch.rand(1, 256, 256, 256) > 0.5 +# input_transformed = fill_holes(input_tensor) + +# ## CCA tests +# # 3d +# input_tensor = torch.rand(1, 256, 256, 256) > 0.5 +# input_transformed = cca(input_tensor) +# # 2d +# input_tensor = torch.rand(1, 256, 256) > 0.5 +# input_transformed = cca(input_tensor) +# # 2d rgb +# input_tensor = torch.rand(1, 3, 256, 256) > 0.5 +# input_transformed = cca(input_tensor) + +# input_tensor = torch.rand(1, 256, 256, 256) +# cropper = global_preprocessing_dict["crop_external_zero_planes"]( +# patch_size=[128, 128, 128] +# ) +# input_transformed = cropper(input_tensor) + +# cropper = global_preprocessing_dict["crop"]([64, 64, 64]) +# input_transformed = cropper(input_tensor) +# assert input_transformed.shape == (1, 128, 128, 128), "Cropping should work" + +# cropper = global_preprocessing_dict["centercrop"]([128, 128, 128]) +# input_transformed = cropper(input_tensor) +# assert input_transformed.shape == (1, 128, 128, 128), "Center-crop should work" + +# # test pure morphological operations +# input_tensor_3d = torch.rand(1, 1, 256, 256, 256) +# input_tensor_2d = torch.rand(1, 3, 256, 256) +# for mode in ["dilation", "erosion", "opening", "closing"]: +# input_transformed_3d = torch_morphological(input_tensor_3d, mode=mode) +# assert len(input_transformed_3d.shape) == 5, "Output should be 5D" +# input_transformed_2d = torch_morphological(input_tensor_2d, mode=mode) +# assert len(input_transformed_2d.shape) == 4, "Output should be 4D" + +# # test for failure +# with pytest.raises(Exception) as exc_info: +# input_tensor_4d = torch.rand(1, 1, 32, 32, 32, 32) +# input_transformed_3d = torch_morphological(input_tensor_4d) + +# print("Exception raised:", exc_info.value) + +# # test obtaining arrays +# input_tensor_3d = torch.rand(256, 256, 256) +# input_array = get_array_from_image_or_tensor(input_tensor_3d) +# assert isinstance(input_array, np.ndarray), "Array should be obtained from tensor" +# input_image = sitk.GetImageFromArray(input_array) +# input_array = get_array_from_image_or_tensor(input_image) +# assert isinstance(input_array, np.ndarray), "Array should be obtained from image" +# input_array = get_array_from_image_or_tensor(input_array) +# assert isinstance(input_array, np.ndarray), "Array should be obtained from array" + +# with pytest.raises(Exception) as exc_info: +# input_list = [0, 1] +# input_array = get_array_from_image_or_tensor(input_list) +# exception_raised = exc_info.value +# print("Exception raised: ", exception_raised) + +# ## image rescaling test +# input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) +# # try out different options +# for params in [ +# {}, +# None, +# {"in_min_max": [5, 250], "out_min_max": [-1, 2]}, +# {"out_min_max": [0, 1], "percentiles": [5, 95]}, +# ]: +# rescaler = global_preprocessing_dict["rescale"](params) +# input_transformed = rescaler(input_tensor) +# assert ( +# input_transformed.min() >= rescaler.out_min_max[0] +# ), "Rescaling should work for min" +# assert ( +# input_transformed.max() <= rescaler.out_min_max[1] +# ), "Rescaling should work for max" + +# # tests for histology alpha check +# input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) +# _ = get_nonzero_percent(input_tensor) +# assert not ( +# alpha_rgb_2d_channel_check(input_tensor) +# ), "Alpha channel check should work for 4D tensors" +# input_tensor = torch.randint(0, 256, (64, 64, 64)) +# assert not ( +# alpha_rgb_2d_channel_check(input_tensor) +# ), "Alpha channel check should work for 3D images" +# input_tensor = torch.randint(0, 256, (64, 64, 4)) +# assert not ( +# alpha_rgb_2d_channel_check(input_tensor) +# ), "Alpha channel check should work for generic 4D images" +# input_tensor = torch.randint(0, 256, (64, 64)) +# assert alpha_rgb_2d_channel_check( +# input_tensor +# ), "Alpha channel check should work for grayscale 2D images" +# input_tensor = torch.randint(0, 256, (64, 64, 3)) +# assert alpha_rgb_2d_channel_check( +# input_tensor +# ), "Alpha channel check should work for RGB images" +# input_tensor = torch.randint(0, 256, (64, 64, 4)) +# input_tensor[:, :, 3] = 255 +# assert alpha_rgb_2d_channel_check( +# input_tensor +# ), "Alpha channel check should work for RGBA images" +# input_array = torch.randint(0, 256, (64, 64, 3)).numpy() +# temp_filename = os.path.join(outputDir, "temp.png") +# cv2.imwrite(temp_filename, input_array) +# temp_filename_tiff = convert_to_tiff(temp_filename, outputDir) +# assert os.path.exists(temp_filename_tiff), "Tiff file should be created" + +# # resize tests +# input_tensor = np.random.randint(0, 255, size=(20, 20, 20)) +# input_image = sitk.GetImageFromArray(input_tensor) +# expected_output = (10, 10, 10) +# input_transformed = resize_image(input_image, expected_output) +# assert input_transformed.GetSize() == expected_output, "Resize should work" +# input_tensor = np.random.randint(0, 255, size=(20, 20)) +# input_image = sitk.GetImageFromArray(input_tensor) +# expected_output = [10, 10] +# output_size_dict = {"resize": expected_output} +# input_transformed = resize_image(input_image, output_size_dict) +# assert list(input_transformed.GetSize()) == expected_output, "Resize should work" + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_augmentation_functions(): +# print("29: Starting testing augmentation functions") +# params_all_preprocessing_and_augs = ConfigManager( +# os.path.join(baseConfigDir, "config_all_options.yaml") +# ) + +# # this is for rgb augmentation +# input_tensor = torch.rand(3, 128, 128, 1) +# temp = global_augs_dict["colorjitter"]( +# params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"] +# ) +# output_tensor = None +# output_tensor = temp(input_tensor) +# assert output_tensor != None, "RGB Augmentation should work" + +# # ensuring all code paths are covered +# for key in ["brightness", "contrast", "saturation", "hue"]: +# params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"][ +# key +# ] = 0.25 +# temp = global_augs_dict["colorjitter"]( +# params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"] +# ) +# output_tensor = None +# output_tensor = temp(input_tensor) +# assert output_tensor != None, "RGB Augmentation should work" + +# # testing HED transforms with different options +# input_tensor = torch.rand(3, 128, 128, 1) +# params = { +# "data_augmentation": { +# "hed_transform": {}, +# # "hed_transform_light": {}, +# # "hed_transform_heavy": {}, +# } +# } +# temp = global_augs_dict["hed_transform"]( +# params_all_preprocessing_and_augs["data_augmentation"]["hed_transform"] +# ) +# ranges = [ +# "haematoxylin_bias_range", +# "eosin_bias_range", +# "dab_bias_range", +# "haematoxylin_sigma_range", +# "eosin_sigma_range", +# "dab_sigma_range", +# ] + +# default_range = [-0.1, 0.1] +# for key in ranges: +# params["data_augmentation"]["hed_transform"].setdefault(key, default_range) + +# params["data_augmentation"]["hed_transform"].setdefault( +# "cutoff_range", [0.05, 0.95] +# ) + +# # Check if the params are correctly set for each augmentation type +# assert params["data_augmentation"]["hed_transform"] == { +# "haematoxylin_bias_range": [-0.1, 0.1], +# "eosin_bias_range": [-0.1, 0.1], +# "dab_bias_range": [-0.1, 0.1], +# "haematoxylin_sigma_range": [-0.1, 0.1], +# "eosin_sigma_range": [-0.1, 0.1], +# "dab_sigma_range": [-0.1, 0.1], +# "cutoff_range": [0.05, 0.95], +# } +# temp = global_augs_dict["hed_transform"]( +# params_all_preprocessing_and_augs["data_augmentation"]["hed_transform"] +# ) +# output_tensor = None +# output_tensor = temp(input_tensor) +# assert output_tensor != None, "HED Augmentation should work" + +# # this is for all other augmentations +# input_tensor = torch.rand(3, 128, 128, 128) +# for aug in params_all_preprocessing_and_augs["data_augmentation"]: +# aug_lower = aug.lower() +# output_tensor = None +# if aug_lower in global_augs_dict: +# output_tensor = global_augs_dict[aug]( +# params_all_preprocessing_and_augs["data_augmentation"][aug_lower] +# )(input_tensor) +# assert output_tensor != None, "Augmentation should work" + +# # additional test for elastic +# params_elastic = params_all_preprocessing_and_augs["data_augmentation"]["elastic"] +# for key_to_pop in ["num_control_points", "max_displacement", "locked_borders"]: +# params_elastic.pop(key_to_pop, None) +# output_tensor = global_augs_dict["elastic"](params_elastic)(input_tensor) +# assert output_tensor != None, "Augmentation for base elastic transform should work" + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_checkpointing_segmentation_rad_2d(device): +# print("30: Starting 2D Rad segmentation tests for metrics") +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# parameters["patch_sampler"] = { +# "type": "label", +# "enable_padding": True, +# "biased_sampling": True, +# } +# file_config_temp = write_temp_config_path(parameters) +# parameters = ConfigManager(file_config_temp, version_check_flag=False) + +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["num_epochs"] = 1 +# parameters["nested_training"]["testing"] = 1 +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["num_channels"] = 3 +# parameters["metrics"] = [ +# "dice", +# "dice_per_label", +# "hausdorff", +# "hausdorff95", +# "hd95_per_label", +# "hd100_per_label", +# "normalized_surface_dice", +# "normalized_surface_dice_per_label", +# "sensitivity", +# "sensitivity_per_label", +# "specificity_segmentation", +# "specificity_segmentation_per_label", +# "jaccard", +# "jaccard_per_label", +# ] +# parameters["model"]["architecture"] = "unet" +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# parameters["num_epochs"] = 2 +# parameters["nested_training"]["validation"] = -2 +# parameters["nested_training"]["testing"] = 1 +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=False, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_model_patch_divisibility(): +# print("31: Starting patch divisibility tests") +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# _, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# parameters["model"]["architecture"] = "unet" +# parameters["patch_size"] = [127, 127, 1] +# parameters["num_epochs"] = 1 +# parameters["nested_training"]["testing"] = 1 +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["print_summary"] = False +# parameters["model"]["num_channels"] = 3 +# parameters["metrics"] = ["dice"] +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) + +# # this assertion should fail +# with pytest.raises(BaseException) as _: +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# parameters["model"]["architecture"] = "uinc" +# parameters["model"]["base_filters"] = 11 + +# # this assertion should fail +# with pytest.raises(BaseException) as _: +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_one_hot_logic(): +# print("32: Starting one hot logic tests") +# random_array = np.random.randint(5, size=(20, 20, 20)) +# img = sitk.GetImageFromArray(random_array) +# img_tensor = get_tensor_from_image(img).to(torch.float16) +# img_tensor = img_tensor.unsqueeze(0).unsqueeze(0) + +# class_list = [*range(0, np.max(random_array) + 1)] +# img_tensor_oh = one_hot(img_tensor, class_list) +# img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) +# comparison = random_array == img_tensor_oh_rev_array +# assert comparison.all(), "Arrays are not equal" + +# class_list = ["0", "1||2||3", np.max(random_array)] +# img_tensor_oh = one_hot(img_tensor, class_list) +# img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) + +# # check for background +# comparison = (random_array == 0) == (img_tensor_oh_rev_array == 0) +# assert comparison.all(), "Arrays at '0' are not equal" + +# # check last foreground +# comparison = (random_array == np.max(random_array)) == ( +# img_tensor_oh_rev_array == len(class_list) - 1 +# ) +# assert comparison.all(), "Arrays at final foreground are not equal" + +# # check combined foreground +# combined_array = np.logical_or( +# np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) +# ) +# comparison = combined_array == (img_tensor_oh_rev_array == 1) +# assert comparison.all(), "Arrays at the combined foreground are not equal" + +# parameters = {"data_postprocessing": {}} +# mapped_output = get_mapped_label( +# torch.from_numpy(img_tensor_oh_rev_array), parameters +# ) + +# parameters = {} +# mapped_output = get_mapped_label( +# torch.from_numpy(img_tensor_oh_rev_array), parameters +# ) + +# parameters = {"data_postprocessing": {"mapping": {0: 0, 1: 1, 2: 5}}} +# mapped_output = get_mapped_label( +# torch.from_numpy(img_tensor_oh_rev_array), parameters +# ) + +# for key, value in parameters["data_postprocessing"]["mapping"].items(): +# comparison = (img_tensor_oh_rev_array == key) == (mapped_output == value) +# assert comparison.all(), "Arrays at {}:{} are not equal".format(key, value) + +# # check the case where 0 is present as an int in a special case +# class_list = [0, "1||2||3", np.max(random_array)] +# img_tensor_oh = one_hot(img_tensor, class_list) +# img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) + +# # check for background +# comparison = (random_array == 0) == (img_tensor_oh_rev_array == 0) +# assert comparison.all(), "Arrays at '0' are not equal" + +# # check the case where 0 is absent from class_list +# class_list = ["1||2||3", np.max(random_array)] +# img_tensor_oh = one_hot(img_tensor, class_list) +# img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) + +# # check last foreground +# comparison = (random_array == np.max(random_array)) == ( +# img_tensor_oh_rev_array == len(class_list) +# ) +# assert comparison.all(), "Arrays at final foreground are not equal" + +# # check combined foreground +# combined_array = np.logical_or( +# np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) +# ) +# comparison = combined_array == (img_tensor_oh_rev_array == 1) +# assert comparison.all(), "Arrays at the combined foreground are not equal" + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_anonymizer(): +# print("33: Starting anomymizer tests") +# input_file = get_testdata_file("MR_small.dcm") + +# output_file = os.path.join(outputDir, "MR_small_anonymized.dcm") + +# config_file = os.path.join(baseConfigDir, "config_anonymizer.yaml") + +# run_anonymizer(input_file, output_file, config_file, "rad") +# assert os.path.exists(output_file), "Anonymized file does not exist" + +# # test defaults +# run_anonymizer(input_file, output_file, None, "rad") +# assert os.path.exists(output_file), "Anonymized file does not exist" + +# # test nifti conversion +# config_file_for_nifti = os.path.join(outputDir, "config_anonymizer_nifti.yaml") +# with open(config_file, "r") as file_data: +# yaml_data = file_data.read() +# parameters = yaml.safe_load(yaml_data) +# parameters["convert_to_nifti"] = True +# with open(config_file_for_nifti, "w") as file: +# yaml.dump(parameters, file) + +# # for nifti conversion, the input needs to be in a dir +# input_folder_for_nifti = os.path.join(outputDir, "nifti_input") +# Path(input_folder_for_nifti).mkdir(parents=True, exist_ok=True) +# shutil.copyfile(input_file, os.path.join(input_folder_for_nifti, "MR_small.dcm")) + +# output_file = os.path.join(outputDir, "MR_small.nii.gz") + +# run_anonymizer(input_folder_for_nifti, output_file, config_file_for_nifti, "rad") +# assert os.path.exists(output_file), "Anonymized file does not exist" + +# if not os.path.exists(output_file): +# raise Exception("Output NIfTI file was not created") + +# input_file = os.path.join(inputDir, "2d_histo_segmentation", "1", "image.tiff") +# output_file_histo = os.path.join(outputDir, "histo_anon.tiff") +# # this assertion should fail since histo anonymizer is not implementer +# with pytest.raises(BaseException) as exc_info: +# run_anonymizer(input_folder_for_nifti, output_file_histo, None, "histo") +# assert os.path.exists(output_file_histo), "Anonymized file does not exist" +# print("Exception raised: ", exc_info.value) +# sanitize_outputDir() + +# print("passed") + + +# def test_train_inference_segmentation_histology_2d(device): +# print("34: Starting histology train/inference segmentation tests") +# # overwrite previous results +# sanitize_outputDir() +# output_dir_patches = os.path.join(outputDir, "histo_patches") +# if os.path.isdir(output_dir_patches): +# shutil.rmtree(output_dir_patches) +# Path(output_dir_patches).mkdir(parents=True, exist_ok=True) +# output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") +# Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) + +# parameters_patch = {} +# # extracting minimal number of patches to ensure that the test does not take too long +# parameters_patch["num_patches"] = 10 +# parameters_patch["read_type"] = "sequential" +# # define patches to be extracted in terms of microns +# parameters_patch["patch_size"] = ["1000m", "1000m"] + +# file_config_temp = write_temp_config_path(parameters_patch) + +# patch_extraction( +# inputDir + "/train_2d_histo_segmentation.csv", +# output_dir_patches_output, +# file_config_temp, +# ) + +# file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) +# parameters["patch_size"] = patch_size["2D"] +# parameters["modality"] = "histo" +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["num_channels"] = 3 +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# parameters["model"]["architecture"] = "resunet" +# parameters["nested_training"]["testing"] = 1 +# parameters["nested_training"]["validation"] = -2 +# parameters["metrics"] = ["dice"] +# parameters["model"]["onnx_export"] = True +# parameters["model"]["print_summary"] = True +# parameters["data_preprocessing"]["resize_image"] = [128, 128] +# modelDir = os.path.join(outputDir, "modelDir") +# Path(modelDir).mkdir(parents=True, exist_ok=True) +# TrainingManager( +# dataframe=training_data, +# outputDir=modelDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# inference_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_histo_segmentation.csv", train=False +# ) +# inference_data.drop(index=inference_data.index[-1], axis=0, inplace=True) +# InferenceManager( +# dataframe=inference_data, +# modelDir=modelDir, +# parameters=parameters, +# device=device, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_inference_classification_histology_large_2d(device): +# print( +# "35: Starting histology train/inference classification tests for large images to check exception handling" +# ) +# # overwrite previous results +# sanitize_outputDir() +# output_dir_patches = os.path.join(outputDir, "histo_patches") +# if os.path.isdir(output_dir_patches): +# shutil.rmtree(output_dir_patches) +# Path(output_dir_patches).mkdir(parents=True, exist_ok=True) +# output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") +# Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) + +# for sub in ["1", "2"]: +# file_to_check = os.path.join( +# inputDir, "2d_histo_segmentation", sub, "image_resize.tiff" +# ) +# if os.path.exists(file_to_check): +# os.remove(file_to_check) + +# parameters_patch = {} +# # extracting minimal number of patches to ensure that the test does not take too long +# parameters_patch["num_patches"] = 3 +# parameters_patch["patch_size"] = [128, 128] +# parameters_patch["value_map"] = {0: 0, 255: 255} + +# file_config_temp = write_temp_config_path(parameters_patch) + +# patch_extraction( +# inputDir + "/train_2d_histo_classification.csv", +# output_dir_patches_output, +# file_config_temp, +# ) + +# # resize the image +# input_df, _ = parseTrainingCSV( +# inputDir + "/train_2d_histo_classification.csv", train=False +# ) +# files_to_delete = [] + +# def resize_for_ci(filename, scale): +# """ +# Helper function to resize images in CI + +# Args: +# filename (str): Filename of the image to be resized +# scale (float): Scale factor to resize the image + +# Returns: +# str: Filename of the resized image +# """ +# new_filename = filename.replace(".tiff", "_resize.tiff") +# try: +# img = cv2.imread(filename) +# dims = img.shape +# img_resize = cv2.resize(img, (dims[1] * scale, dims[0] * scale)) +# cv2.imwrite(new_filename, img_resize) +# except Exception as ex1: +# # this is only used in CI +# print("Trying vips:", ex1) +# try: +# os.system( +# "vips resize " + filename + " " + new_filename + " " + str(scale) +# ) +# except Exception as ex2: +# print("Resize could not be done:", ex2) +# return new_filename + +# for _, row in input_df.iterrows(): +# # ensure opm mask size check is triggered +# _, _ = generate_initial_mask(resize_for_ci(row["Channel_0"], scale=2), 1) + +# for patch_size in [ +# [128, 128], +# "[100m,100m]", +# "[100mx100m]", +# "[100mX100m]", +# "[100m*100m]", +# ]: +# _ = get_patch_size_in_microns(row["Channel_0"], patch_size) + +# # try to break resizer +# new_filename = resize_for_ci(row["Channel_0"], scale=10) +# row["Channel_0"] = new_filename +# files_to_delete.append(new_filename) +# # we do not need the last subject +# break + +# resized_inference_data_list = os.path.join( +# inputDir, "train_2d_histo_classification_resize.csv" +# ) +# # drop last subject +# input_df.drop(index=input_df.index[-1], axis=0, inplace=True) +# input_df.to_csv(resized_inference_data_list, index=False) +# files_to_delete.append(resized_inference_data_list) + +# file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") +# temp_df = pd.read_csv(file_for_Training) +# temp_df.drop("Label", axis=1, inplace=True) +# temp_df["valuetopredict"] = np.random.randint(2, size=len(temp_df)) +# temp_df.to_csv(file_for_Training, index=False) +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "histo" +# parameters["patch_size"] = parameters_patch["patch_size"][0] +# file_config_temp = write_temp_config_path(parameters) +# parameters = ConfigManager(file_config_temp, version_check_flag=False) +# parameters["model"]["dimension"] = 2 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["architecture"] = "densenet121" +# parameters["model"]["norm_type"] = "none" +# parameters["data_preprocessing"]["rgba2rgb"] = "" +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# parameters["nested_training"]["testing"] = 1 +# parameters["nested_training"]["validation"] = -2 +# parameters["model"]["print_summary"] = False +# modelDir = os.path.join(outputDir, "modelDir") +# if os.path.isdir(modelDir): +# shutil.rmtree(modelDir) +# Path(modelDir).mkdir(parents=True, exist_ok=True) +# TrainingManager( +# dataframe=training_data, +# outputDir=modelDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# parameters["output_dir"] = modelDir # this is in inference mode +# parameters["data_preprocessing"]["resize_patch"] = parameters_patch["patch_size"] +# parameters["patch_size"] = [ +# parameters_patch["patch_size"][0] * 10, +# parameters_patch["patch_size"][1] * 10, +# ] +# parameters["nested_training"]["validation"] = 1 +# inference_data, parameters["headers"] = parseTrainingCSV( +# resized_inference_data_list, train=False +# ) +# for model_type in all_model_type: +# parameters["model"]["type"] = model_type +# InferenceManager( +# dataframe=inference_data, +# modelDir=modelDir, +# parameters=parameters, +# device=device, +# ) +# all_folders_in_modelDir = os.listdir(modelDir) +# for folder in all_folders_in_modelDir: +# output_subject_dir = os.path.join(modelDir, folder) +# if os.path.isdir(output_subject_dir): +# # check in the default outputDir that's created - this is based on a unique timestamp +# if folder != "output_validation": +# # if 'predictions.csv' are not found, give error +# assert os.path.exists( +# os.path.join( +# output_subject_dir, +# str(input_df["SubjectID"][0]), +# "predictions.csv", +# ) +# ), "predictions.csv not found" +# # ensure previous results are removed +# sanitize_outputDir() + +# for file in files_to_delete: +# os.remove(file) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_inference_classification_histology_2d(device): +# print("36: Starting histology train/inference classification tests") +# # overwrite previous results +# sanitize_outputDir() +# output_dir_patches = os.path.join(outputDir, "histo_patches") +# if os.path.isdir(output_dir_patches): +# shutil.rmtree(output_dir_patches) +# Path(output_dir_patches).mkdir(parents=True, exist_ok=True) +# output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") + +# parameters_patch = {} +# # extracting minimal number of patches to ensure that the test does not take too long +# parameters_patch["patch_size"] = [128, 128] + +# for num_patches in [-1, 3]: +# parameters_patch["num_patches"] = num_patches +# file_config_temp = write_temp_config_path(parameters_patch) + +# if os.path.exists(output_dir_patches_output): +# shutil.rmtree(output_dir_patches_output) +# # this ensures that the output directory for num_patches=3 is preserved +# Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) +# patch_extraction( +# inputDir + "/train_2d_histo_classification.csv", +# output_dir_patches_output, +# file_config_temp, +# ) + +# file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") +# temp_df = pd.read_csv(file_for_Training) +# temp_df.drop("Label", axis=1, inplace=True) +# temp_df["valuetopredict"] = np.random.randint(2, size=6) +# temp_df.to_csv(file_for_Training, index=False) +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "histo" +# parameters["patch_size"] = 128 +# file_config_temp = write_temp_config_path(parameters) +# parameters = ConfigManager(file_config_temp, version_check_flag=False) +# parameters["model"]["dimension"] = 2 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["architecture"] = "densenet121" +# parameters["model"]["norm_type"] = "none" +# parameters["data_preprocessing"]["rgba2rgb"] = "" +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# parameters["nested_training"]["testing"] = 1 +# parameters["nested_training"]["validation"] = -2 +# parameters["model"]["print_summary"] = False +# modelDir = os.path.join(outputDir, "modelDir") +# if os.path.isdir(modelDir): +# shutil.rmtree(modelDir) +# Path(modelDir).mkdir(parents=True, exist_ok=True) +# TrainingManager( +# dataframe=training_data, +# outputDir=modelDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# parameters["output_dir"] = modelDir # this is in inference mode +# inference_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_histo_classification.csv", train=False +# ) +# for model_type in all_model_type: +# parameters["nested_training"]["testing"] = 1 +# parameters["nested_training"]["validation"] = -2 +# parameters["output_dir"] = modelDir # this is in inference mode +# inference_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_histo_segmentation.csv", train=False +# ) +# parameters["model"]["type"] = model_type +# InferenceManager( +# dataframe=inference_data, +# modelDir=modelDir, +# parameters=parameters, +# device=device, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_segmentation_unet_layerchange_rad_2d(device): +# # test case to up code coverage --> test decreasing allowed layers for unet +# print("37: Starting 2D Rad segmentation tests for normtype") +# # read and parse csv +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# for model in ["unet_multilayer", "lightunet_multilayer", "unetr"]: +# parameters["model"]["architecture"] = model +# parameters["patch_size"] = [4, 4, 1] +# parameters["model"]["dimension"] = 2 + +# # this assertion should fail +# with pytest.raises(BaseException) as _: +# global_models_dict[parameters["model"]["architecture"]]( +# parameters=parameters +# ) + +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["depth"] = 7 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["print_summary"] = False +# parameters["model"]["num_channels"] = 3 +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# parameters["model"]["norm_type"] = "batch" +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# if os.path.isdir(outputDir): +# shutil.rmtree(outputDir) # overwrite previous results +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_segmentation_unetr_rad_3d(device): +# print("38: Testing UNETR for 3D segmentation") +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_segmentation.csv" +# ) +# parameters["model"]["architecture"] = "unetr" +# parameters["patch_size"] = [4, 4, 4] +# parameters["model"]["dimension"] = 3 +# parameters["model"]["depth"] = 2 +# parameters["model"]["print_summary"] = False + +# # this assertion should fail +# with pytest.raises(BaseException) as _: +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# parameters["model"]["dimension"] = 3 +# parameters["patch_size"] = [32, 32, 32] + +# with pytest.raises(BaseException) as _: +# parameters["model"]["inner_patch_size"] = 19 +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# with pytest.raises(BaseException) as _: +# parameters["model"]["inner_patch_size"] = 64 +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# for patch in [16, 8]: +# parameters["model"]["inner_patch_size"] = patch +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["num_channels"] = len( +# parameters["headers"]["channelHeaders"] +# ) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# parameters["model"]["norm_type"] = "batch" +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# if os.path.isdir(outputDir): +# shutil.rmtree(outputDir) # overwrite previous results +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_segmentation_unetr_rad_2d(device): +# print("39: Testing UNETR for 2D segmentation") +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# parameters["model"]["architecture"] = "unetr" +# parameters["patch_size"] = [128, 128, 1] +# parameters["model"]["dimension"] = 2 + +# for patch in [16, 8]: +# parameters["model"]["inner_patch_size"] = patch +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["print_summary"] = False +# parameters["model"]["num_channels"] = 3 +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# parameters["model"]["norm_type"] = "batch" +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# if os.path.isdir(outputDir): +# shutil.rmtree(outputDir) # overwrite previous results +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_segmentation_transunet_rad_2d(device): +# print("40: Testing TransUNet for 2D segmentation") +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# parameters["model"]["architecture"] = "transunet" +# parameters["patch_size"] = [128, 128, 1] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["print_summary"] = False + +# with pytest.raises(BaseException) as _: +# parameters["model"]["num_heads"] = 6 +# parameters["model"]["embed_dim"] = 64 +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# with pytest.raises(BaseException) as _: +# parameters["model"]["num_heads"] = 3 +# parameters["model"]["embed_dim"] = 50 +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# parameters["model"]["embed_dim"] = 64 +# parameters["model"]["depth"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["num_heads"] = 8 +# parameters["model"]["amp"] = True +# parameters["model"]["num_channels"] = 3 +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# parameters["model"]["norm_type"] = "batch" +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# if os.path.isdir(outputDir): +# shutil.rmtree(outputDir) # overwrite previous results +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_segmentation_transunet_rad_3d(device): +# print("41: Testing TransUNet for 3D segmentation") +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_segmentation.csv" +# ) +# parameters["model"]["architecture"] = "transunet" +# parameters["patch_size"] = [4, 4, 4] +# parameters["model"]["dimension"] = 3 +# parameters["model"]["print_summary"] = False + +# # this assertion should fail +# with pytest.raises(BaseException) as _: +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# parameters["model"]["dimension"] = 3 +# parameters["patch_size"] = [32, 32, 32] + +# with pytest.raises(BaseException) as _: +# parameters["model"]["depth"] = 1 +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# with pytest.raises(BaseException) as _: +# parameters["model"]["num_heads"] = 6 +# parameters["model"]["embed_dim"] = 64 +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# with pytest.raises(BaseException) as _: +# parameters["model"]["num_heads"] = 3 +# parameters["model"]["embed_dim"] = 50 +# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + +# parameters["model"]["num_heads"] = 8 +# parameters["model"]["embed_dim"] = 64 +# parameters["model"]["depth"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# parameters["model"]["norm_type"] = "batch" +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# if os.path.isdir(outputDir): +# shutil.rmtree(outputDir) # overwrite previous results +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_train_gradient_clipping_classification_rad_2d(device): +# print("42: Testing gradient clipping") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["modality"] = "rad" +# parameters["track_memory_usage"] = True +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# # read and parse csv +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_classification.csv" +# ) +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # ensure gradient clipping is getting tested +# for clip_mode in ["norm", "value", "agc"]: +# parameters["model"]["architecture"] = "imagenet_vgg11" +# parameters["model"]["final_layer"] = "softmax" +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# parameters["clip_mode"] = clip_mode +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# sanitize_outputDir() + +# print("passed") + + +# def test_train_segmentation_unet_conversion_rad_3d(device): +# print("43: Starting 3D Rad segmentation tests for unet with ACS conversion") +# # read and parse csv +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_3d_rad_segmentation.csv" +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 +# parameters["model"]["class_list"] = [0, 1] +# parameters["model"]["final_layer"] = "softmax" +# parameters["model"]["amp"] = True +# parameters["in_memory"] = True +# parameters["verbose"] = False +# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# # loop through selected models and train for single epoch +# for model in ["unet", "unet_multilayer", "lightunet_multilayer"]: +# for converter_type in ["acs", "soft", "conv3d"]: +# parameters["model"]["converter_type"] = converter_type +# parameters["model"]["architecture"] = model +# parameters["nested_training"]["testing"] = -5 +# parameters["nested_training"]["validation"] = -5 +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_cli_function_configgenerator(): +# print("44: Starting testing cli function for config generator") +# base_config_path = os.path.join(baseConfigDir, "config_all_options.yaml") +# generator_config_path = os.path.join( +# baseConfigDir, "config_generator_sample_strategy.yaml" +# ) +# sanitize_outputDir() +# config_generator(base_config_path, generator_config_path, outputDir) +# all_files = os.listdir(outputDir) +# assert len(all_files) == 72, "config generator did not generate all files" + +# for file in all_files: +# parameters = None +# with suppress_stdout_stderr(): +# parameters = ConfigManager( +# os.path.join(outputDir, file), version_check_flag=False +# ) +# assert parameters, "config generator did not generate valid config files" +# sanitize_outputDir() + +# generator_config = yaml.safe_load(open(generator_config_path, "r")) +# generator_config["second_level_dict_that_should_fail"] = { +# "key_1": {"key_2": "value"} +# } + +# file_config_temp = write_temp_config_path(generator_config) + +# # test for failure +# with pytest.raises(Exception) as exc_info: +# config_generator(base_config_path, file_config_temp, outputDir) +# sanitize_outputDir() + +# print("Exception raised:", exc_info.value) + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_cli_function_recoverconfig(): +# print("45: Testing cli function for recover_config") +# # Train, then recover a config and see if it exists/is valid YAML + +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) +# # patch_size is custom for sdnet +# parameters["patch_size"] = [224, 224, 1] +# parameters["batch_size"] = 2 +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["num_channels"] = 1 +# parameters["model"]["architecture"] = "sdnet" +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) +# output_config_path = write_temp_config_path(None) +# assert recover_config( +# outputDir, output_config_path +# ), "recover_config returned false" +# assert os.path.exists(output_config_path), "Didn't create a config file" + +# new_params = ConfigManager(output_config_path, version_check_flag=False) +# assert new_params, "Created YAML could not be parsed by ConfigManager" + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_deploy_docker(): +# print("46: Testing deployment of a model to Docker") +# # Train, then try deploying that model (requires an installed Docker engine) + +# deploymentOutputDir = os.path.join(outputDir, "mlcube") +# # read and parse csv +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) +# training_data, parameters["headers"] = parseTrainingCSV( +# inputDir + "/train_2d_rad_segmentation.csv" +# ) + +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["num_channels"] = 3 +# parameters["model"]["onnx_export"] = False +# parameters["model"]["print_summary"] = False +# parameters["data_preprocessing"]["resize_image"] = [224, 224] +# parameters["memory_save_mode"] = True + +# parameters = populate_header_in_parameters(parameters, parameters["headers"]) +# sanitize_outputDir() +# TrainingManager( +# dataframe=training_data, +# outputDir=outputDir, +# parameters=parameters, +# device=device, +# resume=False, +# reset=True, +# ) + +# custom_entrypoint = os.path.join( +# gandlfRootDir, +# "mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py", +# ) +# for entrypoint_script in [None, custom_entrypoint]: +# result = run_deployment( +# os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), +# deploymentOutputDir, +# "docker", +# "model", +# entrypoint_script=entrypoint_script, +# configfile=testingDir + "/config_segmentation.yaml", +# modeldir=outputDir, +# requires_gpu=True, +# ) +# msg = "run_deployment returned false" +# if entrypoint_script: +# msg += " with custom entrypoint script" +# assert result, msg + +# sanitize_outputDir() + +# print("passed") + + +# def test_collision_subjectid_test_segmentation_rad_2d(device): +# print("47: Starting 2D Rad segmentation tests for collision of subjectID in test") +# parameters = ConfigManager( +# testingDir + "/config_segmentation.yaml", version_check_flag=False +# ) + +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["num_epochs"] = 1 +# parameters["nested_training"]["testing"] = 1 +# parameters["model"]["dimension"] = 2 +# parameters["model"]["class_list"] = [0, 255] +# parameters["model"]["amp"] = True +# parameters["model"]["print_summary"] = False +# parameters["model"]["num_channels"] = 3 +# parameters["metrics"] = ["dice"] +# parameters["model"]["architecture"] = "unet" +# outputDir = os.path.join(testingDir, "data_output") + +# file_config_temp = write_temp_config_path(parameters) + +# # test the case where outputDir is explicitly provided to InferenceManager +# train_data_path = inputDir + "/train_2d_rad_segmentation.csv" +# test_data_path = inputDir + "/test_2d_rad_segmentation.csv" +# df = pd.read_csv(train_data_path) +# temp_df = pd.read_csv(train_data_path) +# # Concatenate the two dataframes +# df = pd.concat([df, temp_df], ignore_index=True) + +# df.to_csv(test_data_path, index=False) +# _, testing_data, _ = parseTestingCSV(test_data_path, outputDir) +# # Save testing data to a csv file +# testing_data.to_csv(test_data_path, index=False) + +# main_run( +# train_data_path + "," + train_data_path + "," + test_data_path, +# file_config_temp, +# outputDir, +# False, +# device, +# resume=False, +# reset=True, +# ) + +# sanitize_outputDir() + +# print("passed") + + +# def test_generic_random_numbers_are_deterministic_on_cpu(): +# print("48: Starting testing deterministic random numbers generation") + +# set_determinism(seed=42) +# a, b = np.random.rand(3, 3), np.random.rand(3, 3) + +# set_determinism(seed=42) +# c, d = np.random.rand(3, 3), np.random.rand(3, 3) + +# # Check that the generated random numbers are the same with numpy +# assert np.allclose(a, c) +# assert np.allclose(b, d) + +# e, f = [random.random() for _ in range(5)], [random.random() for _ in range(5)] + +# set_determinism(seed=42) +# g, h = [random.random() for _ in range(5)], [random.random() for _ in range(5)] + +# # Check that the generated random numbers are the same with Python's built-in random module +# assert e == g +# assert f == h + +# print("passed") + + +# def test_generic_cli_function_metrics_cli_rad_nd(): +# print("49: Starting metric calculation tests") +# for dim in ["2d", "3d"]: +# for problem_type in ["segmentation", "classification", "synthesis"]: +# synthesis_detected = problem_type == "synthesis" +# problem_type_wrap = problem_type +# if synthesis_detected: +# problem_type_wrap = "classification" +# # read and parse csv +# training_data, _ = parseTrainingCSV( +# inputDir + f"/train_{dim}_rad_{problem_type_wrap}.csv" +# ) +# if problem_type_wrap == "segmentation": +# labels_array = training_data["Label"] +# elif synthesis_detected: +# labels_array = training_data["Channel_0"] +# else: +# labels_array = training_data["ValueToPredict"] +# training_data["target"] = labels_array +# training_data["prediction"] = labels_array +# if synthesis_detected: +# # this optional +# training_data["mask"] = training_data["Label"] + +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + f"/config_{problem_type_wrap}.yaml", +# version_check_flag=False, +# ) +# parameters["modality"] = "rad" +# parameters["patch_size"] = patch_size["2D"] +# parameters["model"]["dimension"] = 2 +# if dim == "3d": +# parameters["patch_size"] = patch_size["3D"] +# parameters["model"]["dimension"] = 3 + +# parameters["verbose"] = False +# if synthesis_detected: +# parameters["problem_type"] = problem_type + +# temp_infer_csv = os.path.join(outputDir, "temp_csv.csv") +# training_data.to_csv(temp_infer_csv, index=False) + +# output_file = os.path.join(outputDir, "output.yaml") + +# temp_config = write_temp_config_path(parameters) + +# # run the metrics calculation +# generate_metrics_dict(temp_infer_csv, temp_config, output_file) + +# assert os.path.isfile(output_file), "Metrics output file was not generated" + +# sanitize_outputDir() + + +# def test_generic_deploy_metrics_docker(): +# print("50: Testing deployment of a metrics generator to Docker") +# # requires an installed Docker engine + +# deploymentOutputDir = os.path.join(outputDir, "mlcube") + +# result = run_deployment( +# os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), +# deploymentOutputDir, +# "docker", +# "metrics", +# ) - sanitize_outputDir() - - print("passed") - - -def test_train_scheduler_classification_rad_2d(device): - print("17: Starting 2D Rad segmentation tests for scheduler") - # read and initialize parameters for specific data dimension - # loop through selected models and train for single epoch - for scheduler in global_schedulers_dict: - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = 3 - parameters["model"]["architecture"] = "densenet121" - parameters["model"]["norm_type"] = "instance" - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters["scheduler"] = {} - parameters["scheduler"]["type"] = scheduler - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - ## ensure parameters are parsed every single time - file_config_temp = write_temp_config_path(parameters) - - parameters = ConfigManager(file_config_temp, version_check_flag=False) - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_optimizer_classification_rad_2d(device): - print("18: Starting 2D Rad classification tests for optimizer") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = 3 - parameters["model"]["architecture"] = "densenet121" - parameters["model"]["norm_type"] = "none" - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for optimizer in global_optimizer_dict: - parameters["optimizer"] = {} - parameters["optimizer"]["type"] = optimizer - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - if os.path.exists(outputDir): - shutil.rmtree(outputDir) # overwrite previous results - Path(outputDir).mkdir(parents=True, exist_ok=True) - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_clip_train_classification_rad_3d(device): - print("19: Starting 3D Rad classification tests for clipping") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters["model"]["architecture"] = "vgg16" - parameters["model"]["norm_type"] = "None" - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for clip_mode in all_clip_modes: - parameters["clip_mode"] = clip_mode - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - sanitize_outputDir() - - print("passed") +# assert result, "run_deployment returned false" +# sanitize_outputDir() +# print("passed") -def test_train_normtype_segmentation_rad_3d(device): - print("20: Starting 3D Rad segmentation tests for normtype") - # read and initialize parameters for specific data dimension - # read and parse csv - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_segmentation.csv" - ) - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - parameters["model"]["class_list"] = [0, 1] - parameters["model"]["amp"] = True - parameters["save_output"] = True - parameters["data_postprocessing"] = {"fill_holes"} - parameters["in_memory"] = True - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - - # these should raise exceptions - for norm_type in ["none", None]: - parameters["model"]["norm_type"] = norm_type - file_config_temp = write_temp_config_path(parameters) - with pytest.raises(Exception) as exc_info: - parameters = ConfigManager(file_config_temp, version_check_flag=False) - - print("Exception raised:", exc_info.value) - - # loop through selected models and train for single epoch - for norm in all_norm_types: - for model in ["resunet", "unet", "fcn", "unetr"]: - parameters["model"]["architecture"] = model - parameters["model"]["norm_type"] = norm - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - if os.path.isdir(outputDir): - shutil.rmtree(outputDir) # overwrite previous results - Path(outputDir).mkdir(parents=True, exist_ok=True) - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() +# def test_generic_data_split(): +# print("51: Starting test for splitting and saving CSVs") +# # read and initialize parameters for specific data dimension +# parameters = ConfigManager( +# testingDir + "/config_classification.yaml", version_check_flag=False +# ) +# parameters["nested_training"] = {"testing": 5, "validation": 5, "stratified": True} +# # read and parse csv +# training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_classification.csv") +# # duplicate the data to test stratified sampling +# training_data_duplicate = training_data._append(training_data) +# for _ in range(1): +# training_data_duplicate = training_data_duplicate._append( +# training_data_duplicate +# ) +# training_data_duplicate.reset_index(drop=True, inplace=True) +# # ensure subjects are not duplicated +# training_data_duplicate["SubjectID"] = training_data_duplicate.index + +# sanitize_outputDir() + +# split_data_and_save_csvs(training_data_duplicate, outputDir, parameters) + +# files_in_outputDir = os.listdir(outputDir) +# assert len(files_in_outputDir) == 15, "CSVs were not split correctly" + +# sanitize_outputDir() + +# print("passed") + +def test_gandlf_logging(): + + gandlf_logger_setup() + message = "Testing logging" + + logging.info(message) + + with open('tmp/gandlf/gandlf.log', 'r') as log_file: + logs = log_file.read() + assert message in logs + print("passed") -def test_train_metrics_segmentation_rad_2d(device): - print("21: Starting 2D Rad segmentation tests for metrics") - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["data_postprocessing"] = {"mapping": {0: 0, 255: 1}} - parameters["model"]["amp"] = True - parameters["save_output"] = True - parameters["model"]["num_channels"] = 3 - parameters["metrics"] = [ - "dice", - "hausdorff", - "hausdorff95", - "normalized_surface_dice", - "sensitivity", - "sensitivity_per_label", - "specificity_segmentation", - "specificity_segmentation_per_label", - "jaccard", - "jaccard_per_label", - ] - parameters["model"]["architecture"] = "resunet" - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - file_config_temp = write_temp_config_path(parameters) - - parameters = ConfigManager(file_config_temp, version_check_flag=False) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_metrics_regression_rad_2d(device): - print("22: Starting 2D Rad regression tests for metrics") - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_regression.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_regression.csv" - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["norm_type"] = "instance" - parameters["model"]["amp"] = False - parameters["model"]["num_channels"] = 3 - parameters["model"]["architecture"] = "vgg11" - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = True - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_losses_segmentation_rad_2d(device): - print("23: Starting 2D Rad segmentation tests for losses") - - # healper function to read and parse yaml and return parameters - def get_parameters_after_alteration(loss_type: str) -> dict: - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - parameters["loss_function"] = loss_type - file_config_temp = write_temp_config_path(parameters) - # read and parse csv - parameters = ConfigManager(file_config_temp, version_check_flag=True) - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - # disabling amp because some losses do not support Half, yet - parameters["model"]["amp"] = False - parameters["model"]["num_channels"] = 3 - parameters["model"]["architecture"] = "resunet" - parameters["metrics"] = ["dice"] - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - return parameters, training_data - - # loop through selected models and train for single epoch - for loss_type in [ - "dc", - "dc_log", - "dcce", - "dcce_logits", - "tversky", - "focal", - "dc_focal", - "mcc", - "mcc_log", - ]: - parameters, training_data = get_parameters_after_alteration(loss_type) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - -def test_generic_config_read(): - print("24: Starting testing reading configuration") - parameters = ConfigManager( - os.path.join(baseConfigDir, "config_all_options.yaml"), version_check_flag=False - ) - parameters["data_preprocessing"]["resize_image"] = [128, 128] - file_config_temp = write_temp_config_path(parameters) - - # read and parse csv - parameters = ConfigManager(file_config_temp, version_check_flag=True) - - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") - assert data_loader is not None, "data_loader is None" - - os.remove(file_config_temp) - - # ensure resize_image is triggered - parameters["data_preprocessing"].pop("resample") - parameters["data_preprocessing"].pop("resample_min") - parameters["data_preprocessing"]["resize_image"] = [128, 128] - parameters["model"]["print_summary"] = False - - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) - - parameters = ConfigManager(file_config_temp, version_check_flag=True) - - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") - assert data_loader is not None, "data_loader is None" - - os.remove(file_config_temp) - - # ensure resize_patch is triggered - parameters["data_preprocessing"].pop("resize_image") - parameters["data_preprocessing"]["resize_patch"] = [64, 64] - - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) - - parameters = ConfigManager(file_config_temp, version_check_flag=True) - - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") - assert data_loader is not None, "data_loader is None" - - os.remove(file_config_temp) - - # ensure resize_image is triggered - parameters["data_preprocessing"].pop("resize_patch") - parameters["data_preprocessing"]["resize"] = [64, 64] - - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) - - parameters = ConfigManager(file_config_temp, version_check_flag=True) - - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - assert parameters is not None, "parameters is None" - data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") - assert data_loader is not None, "data_loader is None" - - os.remove(file_config_temp) - - sanitize_outputDir() - - print("passed") - - -def test_generic_cli_function_preprocess(): - print("25: Starting testing cli function preprocess") - file_config = os.path.join(testingDir, "config_segmentation.yaml") - sanitize_outputDir() - file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") - - input_data_df, _ = parseTrainingCSV(file_data, train=False) - # add random metadata to ensure it gets preserved - input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] - input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) - input_data_df["metadata_test_int"] = np.random.randint( - 0, 100, input_data_df.shape[0] - ) - temp_csv = os.path.join(outputDir, "temp.csv") - input_data_df.to_csv(temp_csv) - - parameters = ConfigManager(file_config) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = "[0, 255||125]" - # disabling amp because some losses do not support Half, yet - parameters["model"]["amp"] = False - parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = 3 - parameters["model"]["architecture"] = "unet" - parameters["metrics"] = ["dice"] - parameters["patch_sampler"] = { - "type": "label", - "enable_padding": True, - "biased_sampling": True, - } - parameters["weighted_loss"] = True - parameters["save_output"] = True - parameters["data_preprocessing"]["to_canonical"] = None - parameters["data_preprocessing"]["rgba_to_rgb"] = None - - file_config_temp = write_temp_config_path(parameters) - - preprocess_and_save(temp_csv, file_config_temp, outputDir) - training_data, parameters["headers"] = parseTrainingCSV( - outputDir + "/data_processed.csv" - ) - - # check that the length of training data is what we expect - assert ( - len(training_data) == input_data_df.shape[0] - ), "Number of subjects in dataframe is not same as that of input dataframe" - assert ( - len(training_data.columns) == len(input_data_df.columns) + 1 - ), "Number of columns in output dataframe is not same as that of input dataframe" # the +1 is for the added index column - sanitize_outputDir() - - ## regression/classification preprocess - file_config = os.path.join(testingDir, "config_regression.yaml") - parameters = ConfigManager(file_config) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["amp"] = False - # read and parse csv - parameters["model"]["num_channels"] = 3 - parameters["scaling_factor"] = 1 - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters["data_preprocessing"]["to_canonical"] = None - parameters["data_preprocessing"]["rgba_to_rgb"] = None - file_data = os.path.join(inputDir, "train_2d_rad_regression.csv") - input_data_df, _ = parseTrainingCSV(file_data, train=False) - # add random metadata to ensure it gets preserved - input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] - input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) - input_data_df["metadata_test_int"] = np.random.randint( - 0, 100, input_data_df.shape[0] - ) - input_data_df.to_csv(temp_csv) - - # store this separately for preprocess testing - with open(file_config_temp, "w") as outfile: - yaml.dump(parameters, outfile, default_flow_style=False) - - preprocess_and_save(temp_csv, file_config_temp, outputDir) - training_data, parameters["headers"] = parseTrainingCSV( - outputDir + "/data_processed.csv" - ) - - # check that the length of training data is what we expect - assert ( - len(training_data) == input_data_df.shape[0] - ), "Number of subjects in dataframe is not same as that of input dataframe" - assert ( - len(training_data.columns) == len(input_data_df.columns) + 1 - ), "Number of columns in output dataframe is not same as that of input dataframe" # the +1 is for the added index column - sanitize_outputDir() - - print("passed") - - -def test_generic_cli_function_mainrun(device): - print("26: Starting testing cli function main_run") - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["num_epochs"] = 1 - parameters["nested_training"]["testing"] = 1 - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = 3 - parameters["metrics"] = ["dice"] - parameters["model"]["architecture"] = "unet" - - file_config_temp = write_temp_config_path(parameters) - - file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") - - main_run( - file_data, file_config_temp, outputDir, True, device, resume=False, reset=True - ) - sanitize_outputDir() - - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) - - # testing train/valid split - main_run( - file_data + "," + file_data, - file_config_temp, - outputDir, - True, - device, - resume=False, - reset=True, - ) - - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) - - # testing train/valid/test split with resume - main_run( - file_data + "," + file_data + "," + file_data, - file_config_temp, - outputDir, - True, - device, - resume=True, - reset=False, - ) - sanitize_outputDir() - - print("passed") - - -def test_dataloader_construction_train_segmentation_3d(device): - print("27: Starting 3D Rad segmentation tests") - # read and parse csv - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - params_all_preprocessing_and_augs = ConfigManager( - os.path.join(baseConfigDir, "config_all_options.yaml") - ) - - # take preprocessing and augmentations from all options - for key in ["data_preprocessing", "data_augmentation"]: - parameters[key] = params_all_preprocessing_and_augs[key] - - # customize parameters to maximize test coverage - parameters["data_preprocessing"].pop("normalize", None) - parameters["data_preprocessing"]["normalize_nonZero"] = None - parameters["data_preprocessing"]["default_probability"] = 1 - parameters.pop("nested_training", None) - parameters["nested_training"] = {} - parameters["nested_training"]["testing"] = 1 - parameters["nested_training"]["validation"] = -5 - - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_segmentation.csv" - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["save_training"] = True - parameters["save_output"] = True - parameters["model"]["dimension"] = 3 - parameters["model"]["class_list"] = [0, 1] - parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters["model"]["architecture"] = "unet" - parameters["weighted_loss"] = False - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters["data_postprocessing"]["mapping"] = {0: 0, 1: 1} - parameters["data_postprocessing"]["fill_holes"] = True - parameters["data_postprocessing"]["cca"] = True - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - -def test_generic_preprocess_functions(): - print("28: Starting testing preprocessing functions") - # initialize an input which has values between [-1,1] - # checking tensor with last dimension of size 1 - input_tensor = torch.rand(4, 256, 256, 1) - input_transformed = global_preprocessing_dict["rgba2rgb"]()(input_tensor) - assert input_transformed.shape[0] == 3, "Number of channels is not 3" - assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" - - input_tensor = torch.rand(3, 256, 256, 1) - input_transformed = global_preprocessing_dict["rgb2rgba"]()(input_tensor) - assert input_transformed.shape[0] == 4, "Number of channels is not 4" - assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" - - input_tensor = 2 * torch.rand(3, 256, 256, 1) - 1 - input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) - input_tensor = 2 * torch.rand(1, 3, 256, 256) - 1 - input_transformed = global_preprocessing_dict["normalize_imagenet"](input_tensor) - input_transformed = global_preprocessing_dict["normalize_standardize"](input_tensor) - input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) - parameters_dict = {} - parameters_dict["min"] = 0.25 - parameters_dict["max"] = 0.75 - input_transformed = global_preprocessing_dict["threshold"](parameters_dict)( - input_tensor - ) - assert ( - torch.count_nonzero( - input_transformed[input_transformed < parameters_dict["min"]] - > parameters_dict["max"] - ) - == 0 - ), "Input should be thresholded" - - input_transformed = global_preprocessing_dict["clip"](parameters_dict)(input_tensor) - assert ( - torch.count_nonzero( - input_transformed[input_transformed < parameters_dict["min"]] - > parameters_dict["max"] - ) - == 0 - ), "Input should be clipped" - - non_zero_normalizer = global_preprocessing_dict["normalize_nonZero_masked"] - input_transformed = non_zero_normalizer(input_tensor) - non_zero_normalizer = global_preprocessing_dict["normalize_positive"] - input_transformed = non_zero_normalizer(input_tensor) - non_zero_normalizer = global_preprocessing_dict["normalize_nonZero"] - input_transformed = non_zero_normalizer(input_tensor) - - ## stain_normalization checks - input_tensor = 2 * torch.rand(3, 256, 256, 1) + 10 - training_data, _ = parseTrainingCSV(inputDir + "/train_2d_rad_segmentation.csv") - parameters_temp = {} - parameters_temp["data_preprocessing"] = {} - parameters_temp["data_preprocessing"]["stain_normalizer"] = { - "target": training_data["Channel_0"][0] - } - for extractor in ["ruifrok", "macenko", "vahadane"]: - parameters_temp["data_preprocessing"]["stain_normalizer"][ - "extractor" - ] = extractor - non_zero_normalizer = global_preprocessing_dict["stain_normalizer"]( - parameters_temp["data_preprocessing"]["stain_normalizer"] - ) - input_transformed = non_zero_normalizer(input_tensor) - - ## histogram matching tests - # histogram equalization - input_tensor = torch.rand(1, 64, 64, 64) - parameters_temp = {} - parameters_temp["data_preprocessing"] = {} - parameters_temp["data_preprocessing"]["histogram_matching"] = {} - non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( - parameters_temp["data_preprocessing"]["histogram_matching"] - ) - input_transformed = non_zero_normalizer(input_tensor) - # adaptive histogram equalization - parameters_temp = {} - parameters_temp["data_preprocessing"] = {} - parameters_temp["data_preprocessing"]["histogram_matching"] = {"target": "adaptive"} - non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( - parameters_temp["data_preprocessing"]["histogram_matching"] - ) - input_transformed = non_zero_normalizer(input_tensor) - # histogram matching - training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_segmentation.csv") - parameters_temp = {} - parameters_temp["data_preprocessing"] = {} - parameters_temp["data_preprocessing"]["histogram_matching"] = { - "target": training_data["Channel_0"][0] - } - non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( - parameters_temp["data_preprocessing"]["histogram_matching"] - ) - input_transformed = non_zero_normalizer(input_tensor) - - # fill holes - input_tensor = torch.rand(1, 256, 256, 256) > 0.5 - input_transformed = fill_holes(input_tensor) - - ## CCA tests - # 3d - input_tensor = torch.rand(1, 256, 256, 256) > 0.5 - input_transformed = cca(input_tensor) - # 2d - input_tensor = torch.rand(1, 256, 256) > 0.5 - input_transformed = cca(input_tensor) - # 2d rgb - input_tensor = torch.rand(1, 3, 256, 256) > 0.5 - input_transformed = cca(input_tensor) - - input_tensor = torch.rand(1, 256, 256, 256) - cropper = global_preprocessing_dict["crop_external_zero_planes"]( - patch_size=[128, 128, 128] - ) - input_transformed = cropper(input_tensor) - - cropper = global_preprocessing_dict["crop"]([64, 64, 64]) - input_transformed = cropper(input_tensor) - assert input_transformed.shape == (1, 128, 128, 128), "Cropping should work" - - cropper = global_preprocessing_dict["centercrop"]([128, 128, 128]) - input_transformed = cropper(input_tensor) - assert input_transformed.shape == (1, 128, 128, 128), "Center-crop should work" - - # test pure morphological operations - input_tensor_3d = torch.rand(1, 1, 256, 256, 256) - input_tensor_2d = torch.rand(1, 3, 256, 256) - for mode in ["dilation", "erosion", "opening", "closing"]: - input_transformed_3d = torch_morphological(input_tensor_3d, mode=mode) - assert len(input_transformed_3d.shape) == 5, "Output should be 5D" - input_transformed_2d = torch_morphological(input_tensor_2d, mode=mode) - assert len(input_transformed_2d.shape) == 4, "Output should be 4D" - - # test for failure - with pytest.raises(Exception) as exc_info: - input_tensor_4d = torch.rand(1, 1, 32, 32, 32, 32) - input_transformed_3d = torch_morphological(input_tensor_4d) - - print("Exception raised:", exc_info.value) - - # test obtaining arrays - input_tensor_3d = torch.rand(256, 256, 256) - input_array = get_array_from_image_or_tensor(input_tensor_3d) - assert isinstance(input_array, np.ndarray), "Array should be obtained from tensor" - input_image = sitk.GetImageFromArray(input_array) - input_array = get_array_from_image_or_tensor(input_image) - assert isinstance(input_array, np.ndarray), "Array should be obtained from image" - input_array = get_array_from_image_or_tensor(input_array) - assert isinstance(input_array, np.ndarray), "Array should be obtained from array" - - with pytest.raises(Exception) as exc_info: - input_list = [0, 1] - input_array = get_array_from_image_or_tensor(input_list) - exception_raised = exc_info.value - print("Exception raised: ", exception_raised) - - ## image rescaling test - input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) - # try out different options - for params in [ - {}, - None, - {"in_min_max": [5, 250], "out_min_max": [-1, 2]}, - {"out_min_max": [0, 1], "percentiles": [5, 95]}, - ]: - rescaler = global_preprocessing_dict["rescale"](params) - input_transformed = rescaler(input_tensor) - assert ( - input_transformed.min() >= rescaler.out_min_max[0] - ), "Rescaling should work for min" - assert ( - input_transformed.max() <= rescaler.out_min_max[1] - ), "Rescaling should work for max" - - # tests for histology alpha check - input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) - _ = get_nonzero_percent(input_tensor) - assert not ( - alpha_rgb_2d_channel_check(input_tensor) - ), "Alpha channel check should work for 4D tensors" - input_tensor = torch.randint(0, 256, (64, 64, 64)) - assert not ( - alpha_rgb_2d_channel_check(input_tensor) - ), "Alpha channel check should work for 3D images" - input_tensor = torch.randint(0, 256, (64, 64, 4)) - assert not ( - alpha_rgb_2d_channel_check(input_tensor) - ), "Alpha channel check should work for generic 4D images" - input_tensor = torch.randint(0, 256, (64, 64)) - assert alpha_rgb_2d_channel_check( - input_tensor - ), "Alpha channel check should work for grayscale 2D images" - input_tensor = torch.randint(0, 256, (64, 64, 3)) - assert alpha_rgb_2d_channel_check( - input_tensor - ), "Alpha channel check should work for RGB images" - input_tensor = torch.randint(0, 256, (64, 64, 4)) - input_tensor[:, :, 3] = 255 - assert alpha_rgb_2d_channel_check( - input_tensor - ), "Alpha channel check should work for RGBA images" - input_array = torch.randint(0, 256, (64, 64, 3)).numpy() - temp_filename = os.path.join(outputDir, "temp.png") - cv2.imwrite(temp_filename, input_array) - temp_filename_tiff = convert_to_tiff(temp_filename, outputDir) - assert os.path.exists(temp_filename_tiff), "Tiff file should be created" - - # resize tests - input_tensor = np.random.randint(0, 255, size=(20, 20, 20)) - input_image = sitk.GetImageFromArray(input_tensor) - expected_output = (10, 10, 10) - input_transformed = resize_image(input_image, expected_output) - assert input_transformed.GetSize() == expected_output, "Resize should work" - input_tensor = np.random.randint(0, 255, size=(20, 20)) - input_image = sitk.GetImageFromArray(input_tensor) - expected_output = [10, 10] - output_size_dict = {"resize": expected_output} - input_transformed = resize_image(input_image, output_size_dict) - assert list(input_transformed.GetSize()) == expected_output, "Resize should work" - - sanitize_outputDir() - - print("passed") - - -def test_generic_augmentation_functions(): - print("29: Starting testing augmentation functions") - params_all_preprocessing_and_augs = ConfigManager( - os.path.join(baseConfigDir, "config_all_options.yaml") - ) - - # this is for rgb augmentation - input_tensor = torch.rand(3, 128, 128, 1) - temp = global_augs_dict["colorjitter"]( - params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"] - ) - output_tensor = None - output_tensor = temp(input_tensor) - assert output_tensor != None, "RGB Augmentation should work" - - # ensuring all code paths are covered - for key in ["brightness", "contrast", "saturation", "hue"]: - params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"][ - key - ] = 0.25 - temp = global_augs_dict["colorjitter"]( - params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"] - ) - output_tensor = None - output_tensor = temp(input_tensor) - assert output_tensor != None, "RGB Augmentation should work" - - # testing HED transforms with different options - input_tensor = torch.rand(3, 128, 128, 1) - params = { - "data_augmentation": { - "hed_transform": {}, - # "hed_transform_light": {}, - # "hed_transform_heavy": {}, - } - } - temp = global_augs_dict["hed_transform"]( - params_all_preprocessing_and_augs["data_augmentation"]["hed_transform"] - ) - ranges = [ - "haematoxylin_bias_range", - "eosin_bias_range", - "dab_bias_range", - "haematoxylin_sigma_range", - "eosin_sigma_range", - "dab_sigma_range", - ] - - default_range = [-0.1, 0.1] - for key in ranges: - params["data_augmentation"]["hed_transform"].setdefault(key, default_range) - - params["data_augmentation"]["hed_transform"].setdefault( - "cutoff_range", [0.05, 0.95] - ) - - # Check if the params are correctly set for each augmentation type - assert params["data_augmentation"]["hed_transform"] == { - "haematoxylin_bias_range": [-0.1, 0.1], - "eosin_bias_range": [-0.1, 0.1], - "dab_bias_range": [-0.1, 0.1], - "haematoxylin_sigma_range": [-0.1, 0.1], - "eosin_sigma_range": [-0.1, 0.1], - "dab_sigma_range": [-0.1, 0.1], - "cutoff_range": [0.05, 0.95], - } - temp = global_augs_dict["hed_transform"]( - params_all_preprocessing_and_augs["data_augmentation"]["hed_transform"] - ) - output_tensor = None - output_tensor = temp(input_tensor) - assert output_tensor != None, "HED Augmentation should work" - - # this is for all other augmentations - input_tensor = torch.rand(3, 128, 128, 128) - for aug in params_all_preprocessing_and_augs["data_augmentation"]: - aug_lower = aug.lower() - output_tensor = None - if aug_lower in global_augs_dict: - output_tensor = global_augs_dict[aug]( - params_all_preprocessing_and_augs["data_augmentation"][aug_lower] - )(input_tensor) - assert output_tensor != None, "Augmentation should work" - - # additional test for elastic - params_elastic = params_all_preprocessing_and_augs["data_augmentation"]["elastic"] - for key_to_pop in ["num_control_points", "max_displacement", "locked_borders"]: - params_elastic.pop(key_to_pop, None) - output_tensor = global_augs_dict["elastic"](params_elastic)(input_tensor) - assert output_tensor != None, "Augmentation for base elastic transform should work" - - sanitize_outputDir() - - print("passed") - - -def test_train_checkpointing_segmentation_rad_2d(device): - print("30: Starting 2D Rad segmentation tests for metrics") - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - parameters["patch_sampler"] = { - "type": "label", - "enable_padding": True, - "biased_sampling": True, - } - file_config_temp = write_temp_config_path(parameters) - parameters = ConfigManager(file_config_temp, version_check_flag=False) - - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["num_epochs"] = 1 - parameters["nested_training"]["testing"] = 1 - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = 3 - parameters["metrics"] = [ - "dice", - "dice_per_label", - "hausdorff", - "hausdorff95", - "hd95_per_label", - "hd100_per_label", - "normalized_surface_dice", - "normalized_surface_dice_per_label", - "sensitivity", - "sensitivity_per_label", - "specificity_segmentation", - "specificity_segmentation_per_label", - "jaccard", - "jaccard_per_label", - ] - parameters["model"]["architecture"] = "unet" - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - parameters["num_epochs"] = 2 - parameters["nested_training"]["validation"] = -2 - parameters["nested_training"]["testing"] = 1 - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=False, - ) - - sanitize_outputDir() - - print("passed") - - -def test_generic_model_patch_divisibility(): - print("31: Starting patch divisibility tests") - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - _, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - parameters["model"]["architecture"] = "unet" - parameters["patch_size"] = [127, 127, 1] - parameters["num_epochs"] = 1 - parameters["nested_training"]["testing"] = 1 - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = 3 - parameters["metrics"] = ["dice"] - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - - # this assertion should fail - with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - parameters["model"]["architecture"] = "uinc" - parameters["model"]["base_filters"] = 11 - - # this assertion should fail - with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - sanitize_outputDir() - - print("passed") - - -def test_generic_one_hot_logic(): - print("32: Starting one hot logic tests") - random_array = np.random.randint(5, size=(20, 20, 20)) - img = sitk.GetImageFromArray(random_array) - img_tensor = get_tensor_from_image(img).to(torch.float16) - img_tensor = img_tensor.unsqueeze(0).unsqueeze(0) - - class_list = [*range(0, np.max(random_array) + 1)] - img_tensor_oh = one_hot(img_tensor, class_list) - img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) - comparison = random_array == img_tensor_oh_rev_array - assert comparison.all(), "Arrays are not equal" - - class_list = ["0", "1||2||3", np.max(random_array)] - img_tensor_oh = one_hot(img_tensor, class_list) - img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) - - # check for background - comparison = (random_array == 0) == (img_tensor_oh_rev_array == 0) - assert comparison.all(), "Arrays at '0' are not equal" - - # check last foreground - comparison = (random_array == np.max(random_array)) == ( - img_tensor_oh_rev_array == len(class_list) - 1 - ) - assert comparison.all(), "Arrays at final foreground are not equal" - - # check combined foreground - combined_array = np.logical_or( - np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) - ) - comparison = combined_array == (img_tensor_oh_rev_array == 1) - assert comparison.all(), "Arrays at the combined foreground are not equal" - - parameters = {"data_postprocessing": {}} - mapped_output = get_mapped_label( - torch.from_numpy(img_tensor_oh_rev_array), parameters - ) - - parameters = {} - mapped_output = get_mapped_label( - torch.from_numpy(img_tensor_oh_rev_array), parameters - ) - - parameters = {"data_postprocessing": {"mapping": {0: 0, 1: 1, 2: 5}}} - mapped_output = get_mapped_label( - torch.from_numpy(img_tensor_oh_rev_array), parameters - ) - - for key, value in parameters["data_postprocessing"]["mapping"].items(): - comparison = (img_tensor_oh_rev_array == key) == (mapped_output == value) - assert comparison.all(), "Arrays at {}:{} are not equal".format(key, value) - - # check the case where 0 is present as an int in a special case - class_list = [0, "1||2||3", np.max(random_array)] - img_tensor_oh = one_hot(img_tensor, class_list) - img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) - - # check for background - comparison = (random_array == 0) == (img_tensor_oh_rev_array == 0) - assert comparison.all(), "Arrays at '0' are not equal" - - # check the case where 0 is absent from class_list - class_list = ["1||2||3", np.max(random_array)] - img_tensor_oh = one_hot(img_tensor, class_list) - img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) - - # check last foreground - comparison = (random_array == np.max(random_array)) == ( - img_tensor_oh_rev_array == len(class_list) - ) - assert comparison.all(), "Arrays at final foreground are not equal" - - # check combined foreground - combined_array = np.logical_or( - np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) - ) - comparison = combined_array == (img_tensor_oh_rev_array == 1) - assert comparison.all(), "Arrays at the combined foreground are not equal" - - sanitize_outputDir() - - print("passed") - - -def test_generic_anonymizer(): - print("33: Starting anomymizer tests") - input_file = get_testdata_file("MR_small.dcm") - - output_file = os.path.join(outputDir, "MR_small_anonymized.dcm") - - config_file = os.path.join(baseConfigDir, "config_anonymizer.yaml") - - run_anonymizer(input_file, output_file, config_file, "rad") - assert os.path.exists(output_file), "Anonymized file does not exist" - - # test defaults - run_anonymizer(input_file, output_file, None, "rad") - assert os.path.exists(output_file), "Anonymized file does not exist" - - # test nifti conversion - config_file_for_nifti = os.path.join(outputDir, "config_anonymizer_nifti.yaml") - with open(config_file, "r") as file_data: - yaml_data = file_data.read() - parameters = yaml.safe_load(yaml_data) - parameters["convert_to_nifti"] = True - with open(config_file_for_nifti, "w") as file: - yaml.dump(parameters, file) - - # for nifti conversion, the input needs to be in a dir - input_folder_for_nifti = os.path.join(outputDir, "nifti_input") - Path(input_folder_for_nifti).mkdir(parents=True, exist_ok=True) - shutil.copyfile(input_file, os.path.join(input_folder_for_nifti, "MR_small.dcm")) - - output_file = os.path.join(outputDir, "MR_small.nii.gz") - - run_anonymizer(input_folder_for_nifti, output_file, config_file_for_nifti, "rad") - assert os.path.exists(output_file), "Anonymized file does not exist" - - if not os.path.exists(output_file): - raise Exception("Output NIfTI file was not created") - - input_file = os.path.join(inputDir, "2d_histo_segmentation", "1", "image.tiff") - output_file_histo = os.path.join(outputDir, "histo_anon.tiff") - # this assertion should fail since histo anonymizer is not implementer - with pytest.raises(BaseException) as exc_info: - run_anonymizer(input_folder_for_nifti, output_file_histo, None, "histo") - assert os.path.exists(output_file_histo), "Anonymized file does not exist" - print("Exception raised: ", exc_info.value) - sanitize_outputDir() - - print("passed") - - -def test_train_inference_segmentation_histology_2d(device): - print("34: Starting histology train/inference segmentation tests") - # overwrite previous results - sanitize_outputDir() - output_dir_patches = os.path.join(outputDir, "histo_patches") - if os.path.isdir(output_dir_patches): - shutil.rmtree(output_dir_patches) - Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") - Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) - - parameters_patch = {} - # extracting minimal number of patches to ensure that the test does not take too long - parameters_patch["num_patches"] = 10 - parameters_patch["read_type"] = "sequential" - # define patches to be extracted in terms of microns - parameters_patch["patch_size"] = ["1000m", "1000m"] - - file_config_temp = write_temp_config_path(parameters_patch) - - patch_extraction( - inputDir + "/train_2d_histo_segmentation.csv", - output_dir_patches_output, - file_config_temp, - ) - - file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) - parameters["patch_size"] = patch_size["2D"] - parameters["modality"] = "histo" - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - parameters["model"]["architecture"] = "resunet" - parameters["nested_training"]["testing"] = 1 - parameters["nested_training"]["validation"] = -2 - parameters["metrics"] = ["dice"] - parameters["model"]["onnx_export"] = True - parameters["model"]["print_summary"] = True - parameters["data_preprocessing"]["resize_image"] = [128, 128] - modelDir = os.path.join(outputDir, "modelDir") - Path(modelDir).mkdir(parents=True, exist_ok=True) - TrainingManager( - dataframe=training_data, - outputDir=modelDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - inference_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_histo_segmentation.csv", train=False - ) - inference_data.drop(index=inference_data.index[-1], axis=0, inplace=True) - InferenceManager( - dataframe=inference_data, - modelDir=modelDir, - parameters=parameters, - device=device, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_inference_classification_histology_large_2d(device): - print( - "35: Starting histology train/inference classification tests for large images to check exception handling" - ) - # overwrite previous results - sanitize_outputDir() - output_dir_patches = os.path.join(outputDir, "histo_patches") - if os.path.isdir(output_dir_patches): - shutil.rmtree(output_dir_patches) - Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") - Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) - - for sub in ["1", "2"]: - file_to_check = os.path.join( - inputDir, "2d_histo_segmentation", sub, "image_resize.tiff" - ) - if os.path.exists(file_to_check): - os.remove(file_to_check) - - parameters_patch = {} - # extracting minimal number of patches to ensure that the test does not take too long - parameters_patch["num_patches"] = 3 - parameters_patch["patch_size"] = [128, 128] - parameters_patch["value_map"] = {0: 0, 255: 255} - - file_config_temp = write_temp_config_path(parameters_patch) - - patch_extraction( - inputDir + "/train_2d_histo_classification.csv", - output_dir_patches_output, - file_config_temp, - ) - - # resize the image - input_df, _ = parseTrainingCSV( - inputDir + "/train_2d_histo_classification.csv", train=False - ) - files_to_delete = [] - - def resize_for_ci(filename, scale): - """ - Helper function to resize images in CI - - Args: - filename (str): Filename of the image to be resized - scale (float): Scale factor to resize the image - - Returns: - str: Filename of the resized image - """ - new_filename = filename.replace(".tiff", "_resize.tiff") - try: - img = cv2.imread(filename) - dims = img.shape - img_resize = cv2.resize(img, (dims[1] * scale, dims[0] * scale)) - cv2.imwrite(new_filename, img_resize) - except Exception as ex1: - # this is only used in CI - print("Trying vips:", ex1) - try: - os.system( - "vips resize " + filename + " " + new_filename + " " + str(scale) - ) - except Exception as ex2: - print("Resize could not be done:", ex2) - return new_filename - - for _, row in input_df.iterrows(): - # ensure opm mask size check is triggered - _, _ = generate_initial_mask(resize_for_ci(row["Channel_0"], scale=2), 1) - - for patch_size in [ - [128, 128], - "[100m,100m]", - "[100mx100m]", - "[100mX100m]", - "[100m*100m]", - ]: - _ = get_patch_size_in_microns(row["Channel_0"], patch_size) - - # try to break resizer - new_filename = resize_for_ci(row["Channel_0"], scale=10) - row["Channel_0"] = new_filename - files_to_delete.append(new_filename) - # we do not need the last subject - break - - resized_inference_data_list = os.path.join( - inputDir, "train_2d_histo_classification_resize.csv" - ) - # drop last subject - input_df.drop(index=input_df.index[-1], axis=0, inplace=True) - input_df.to_csv(resized_inference_data_list, index=False) - files_to_delete.append(resized_inference_data_list) - - file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") - temp_df = pd.read_csv(file_for_Training) - temp_df.drop("Label", axis=1, inplace=True) - temp_df["valuetopredict"] = np.random.randint(2, size=len(temp_df)) - temp_df.to_csv(file_for_Training, index=False) - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "histo" - parameters["patch_size"] = parameters_patch["patch_size"][0] - file_config_temp = write_temp_config_path(parameters) - parameters = ConfigManager(file_config_temp, version_check_flag=False) - parameters["model"]["dimension"] = 2 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) - parameters["model"]["num_channels"] = 3 - parameters["model"]["architecture"] = "densenet121" - parameters["model"]["norm_type"] = "none" - parameters["data_preprocessing"]["rgba2rgb"] = "" - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - parameters["nested_training"]["testing"] = 1 - parameters["nested_training"]["validation"] = -2 - parameters["model"]["print_summary"] = False - modelDir = os.path.join(outputDir, "modelDir") - if os.path.isdir(modelDir): - shutil.rmtree(modelDir) - Path(modelDir).mkdir(parents=True, exist_ok=True) - TrainingManager( - dataframe=training_data, - outputDir=modelDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - parameters["output_dir"] = modelDir # this is in inference mode - parameters["data_preprocessing"]["resize_patch"] = parameters_patch["patch_size"] - parameters["patch_size"] = [ - parameters_patch["patch_size"][0] * 10, - parameters_patch["patch_size"][1] * 10, - ] - parameters["nested_training"]["validation"] = 1 - inference_data, parameters["headers"] = parseTrainingCSV( - resized_inference_data_list, train=False - ) - for model_type in all_model_type: - parameters["model"]["type"] = model_type - InferenceManager( - dataframe=inference_data, - modelDir=modelDir, - parameters=parameters, - device=device, - ) - all_folders_in_modelDir = os.listdir(modelDir) - for folder in all_folders_in_modelDir: - output_subject_dir = os.path.join(modelDir, folder) - if os.path.isdir(output_subject_dir): - # check in the default outputDir that's created - this is based on a unique timestamp - if folder != "output_validation": - # if 'predictions.csv' are not found, give error - assert os.path.exists( - os.path.join( - output_subject_dir, - str(input_df["SubjectID"][0]), - "predictions.csv", - ) - ), "predictions.csv not found" - # ensure previous results are removed - sanitize_outputDir() - - for file in files_to_delete: - os.remove(file) - - sanitize_outputDir() - - print("passed") - - -def test_train_inference_classification_histology_2d(device): - print("36: Starting histology train/inference classification tests") - # overwrite previous results - sanitize_outputDir() - output_dir_patches = os.path.join(outputDir, "histo_patches") - if os.path.isdir(output_dir_patches): - shutil.rmtree(output_dir_patches) - Path(output_dir_patches).mkdir(parents=True, exist_ok=True) - output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") - - parameters_patch = {} - # extracting minimal number of patches to ensure that the test does not take too long - parameters_patch["patch_size"] = [128, 128] - - for num_patches in [-1, 3]: - parameters_patch["num_patches"] = num_patches - file_config_temp = write_temp_config_path(parameters_patch) - - if os.path.exists(output_dir_patches_output): - shutil.rmtree(output_dir_patches_output) - # this ensures that the output directory for num_patches=3 is preserved - Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) - patch_extraction( - inputDir + "/train_2d_histo_classification.csv", - output_dir_patches_output, - file_config_temp, - ) - - file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") - temp_df = pd.read_csv(file_for_Training) - temp_df.drop("Label", axis=1, inplace=True) - temp_df["valuetopredict"] = np.random.randint(2, size=6) - temp_df.to_csv(file_for_Training, index=False) - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "histo" - parameters["patch_size"] = 128 - file_config_temp = write_temp_config_path(parameters) - parameters = ConfigManager(file_config_temp, version_check_flag=False) - parameters["model"]["dimension"] = 2 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) - parameters["model"]["num_channels"] = 3 - parameters["model"]["architecture"] = "densenet121" - parameters["model"]["norm_type"] = "none" - parameters["data_preprocessing"]["rgba2rgb"] = "" - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - parameters["nested_training"]["testing"] = 1 - parameters["nested_training"]["validation"] = -2 - parameters["model"]["print_summary"] = False - modelDir = os.path.join(outputDir, "modelDir") - if os.path.isdir(modelDir): - shutil.rmtree(modelDir) - Path(modelDir).mkdir(parents=True, exist_ok=True) - TrainingManager( - dataframe=training_data, - outputDir=modelDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - parameters["output_dir"] = modelDir # this is in inference mode - inference_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_histo_classification.csv", train=False - ) - for model_type in all_model_type: - parameters["nested_training"]["testing"] = 1 - parameters["nested_training"]["validation"] = -2 - parameters["output_dir"] = modelDir # this is in inference mode - inference_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_histo_segmentation.csv", train=False - ) - parameters["model"]["type"] = model_type - InferenceManager( - dataframe=inference_data, - modelDir=modelDir, - parameters=parameters, - device=device, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_segmentation_unet_layerchange_rad_2d(device): - # test case to up code coverage --> test decreasing allowed layers for unet - print("37: Starting 2D Rad segmentation tests for normtype") - # read and parse csv - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - for model in ["unet_multilayer", "lightunet_multilayer", "unetr"]: - parameters["model"]["architecture"] = model - parameters["patch_size"] = [4, 4, 1] - parameters["model"]["dimension"] = 2 - - # this assertion should fail - with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]]( - parameters=parameters - ) - - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["depth"] = 7 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - parameters["model"]["norm_type"] = "batch" - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - if os.path.isdir(outputDir): - shutil.rmtree(outputDir) # overwrite previous results - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_segmentation_unetr_rad_3d(device): - print("38: Testing UNETR for 3D segmentation") - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_segmentation.csv" - ) - parameters["model"]["architecture"] = "unetr" - parameters["patch_size"] = [4, 4, 4] - parameters["model"]["dimension"] = 3 - parameters["model"]["depth"] = 2 - parameters["model"]["print_summary"] = False - - # this assertion should fail - with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - parameters["model"]["dimension"] = 3 - parameters["patch_size"] = [32, 32, 32] - - with pytest.raises(BaseException) as _: - parameters["model"]["inner_patch_size"] = 19 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - with pytest.raises(BaseException) as _: - parameters["model"]["inner_patch_size"] = 64 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - for patch in [16, 8]: - parameters["model"]["inner_patch_size"] = patch - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = len( - parameters["headers"]["channelHeaders"] - ) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - parameters["model"]["norm_type"] = "batch" - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - if os.path.isdir(outputDir): - shutil.rmtree(outputDir) # overwrite previous results - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_segmentation_unetr_rad_2d(device): - print("39: Testing UNETR for 2D segmentation") - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - parameters["model"]["architecture"] = "unetr" - parameters["patch_size"] = [128, 128, 1] - parameters["model"]["dimension"] = 2 - - for patch in [16, 8]: - parameters["model"]["inner_patch_size"] = patch - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - parameters["model"]["norm_type"] = "batch" - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - if os.path.isdir(outputDir): - shutil.rmtree(outputDir) # overwrite previous results - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_segmentation_transunet_rad_2d(device): - print("40: Testing TransUNet for 2D segmentation") - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - parameters["model"]["architecture"] = "transunet" - parameters["patch_size"] = [128, 128, 1] - parameters["model"]["dimension"] = 2 - parameters["model"]["print_summary"] = False - - with pytest.raises(BaseException) as _: - parameters["model"]["num_heads"] = 6 - parameters["model"]["embed_dim"] = 64 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - with pytest.raises(BaseException) as _: - parameters["model"]["num_heads"] = 3 - parameters["model"]["embed_dim"] = 50 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - parameters["model"]["embed_dim"] = 64 - parameters["model"]["depth"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["num_heads"] = 8 - parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = 3 - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - parameters["model"]["norm_type"] = "batch" - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - if os.path.isdir(outputDir): - shutil.rmtree(outputDir) # overwrite previous results - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_segmentation_transunet_rad_3d(device): - print("41: Testing TransUNet for 3D segmentation") - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_segmentation.csv" - ) - parameters["model"]["architecture"] = "transunet" - parameters["patch_size"] = [4, 4, 4] - parameters["model"]["dimension"] = 3 - parameters["model"]["print_summary"] = False - - # this assertion should fail - with pytest.raises(BaseException) as _: - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - parameters["model"]["dimension"] = 3 - parameters["patch_size"] = [32, 32, 32] - - with pytest.raises(BaseException) as _: - parameters["model"]["depth"] = 1 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - with pytest.raises(BaseException) as _: - parameters["model"]["num_heads"] = 6 - parameters["model"]["embed_dim"] = 64 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - with pytest.raises(BaseException) as _: - parameters["model"]["num_heads"] = 3 - parameters["model"]["embed_dim"] = 50 - global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - - parameters["model"]["num_heads"] = 8 - parameters["model"]["embed_dim"] = 64 - parameters["model"]["depth"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - parameters["model"]["norm_type"] = "batch" - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - if os.path.isdir(outputDir): - shutil.rmtree(outputDir) # overwrite previous results - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_train_gradient_clipping_classification_rad_2d(device): - print("42: Testing gradient clipping") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["modality"] = "rad" - parameters["track_memory_usage"] = True - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - # read and parse csv - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_classification.csv" - ) - parameters["model"]["num_channels"] = 3 - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # ensure gradient clipping is getting tested - for clip_mode in ["norm", "value", "agc"]: - parameters["model"]["architecture"] = "imagenet_vgg11" - parameters["model"]["final_layer"] = "softmax" - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - parameters["clip_mode"] = clip_mode - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - sanitize_outputDir() - - print("passed") - - -def test_train_segmentation_unet_conversion_rad_3d(device): - print("43: Starting 3D Rad segmentation tests for unet with ACS conversion") - # read and parse csv - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_segmentation.csv" - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - parameters["model"]["class_list"] = [0, 1] - parameters["model"]["final_layer"] = "softmax" - parameters["model"]["amp"] = True - parameters["in_memory"] = True - parameters["verbose"] = False - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for model in ["unet", "unet_multilayer", "lightunet_multilayer"]: - for converter_type in ["acs", "soft", "conv3d"]: - parameters["model"]["converter_type"] = converter_type - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_generic_cli_function_configgenerator(): - print("44: Starting testing cli function for config generator") - base_config_path = os.path.join(baseConfigDir, "config_all_options.yaml") - generator_config_path = os.path.join( - baseConfigDir, "config_generator_sample_strategy.yaml" - ) - sanitize_outputDir() - config_generator(base_config_path, generator_config_path, outputDir) - all_files = os.listdir(outputDir) - assert len(all_files) == 72, "config generator did not generate all files" - - for file in all_files: - parameters = None - with suppress_stdout_stderr(): - parameters = ConfigManager( - os.path.join(outputDir, file), version_check_flag=False - ) - assert parameters, "config generator did not generate valid config files" - sanitize_outputDir() - - generator_config = yaml.safe_load(open(generator_config_path, "r")) - generator_config["second_level_dict_that_should_fail"] = { - "key_1": {"key_2": "value"} - } - - file_config_temp = write_temp_config_path(generator_config) - - # test for failure - with pytest.raises(Exception) as exc_info: - config_generator(base_config_path, file_config_temp, outputDir) - sanitize_outputDir() - - print("Exception raised:", exc_info.value) - - sanitize_outputDir() - - print("passed") - - -def test_generic_cli_function_recoverconfig(): - print("45: Testing cli function for recover_config") - # Train, then recover a config and see if it exists/is valid YAML - - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - # patch_size is custom for sdnet - parameters["patch_size"] = [224, 224, 1] - parameters["batch_size"] = 2 - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["num_channels"] = 1 - parameters["model"]["architecture"] = "sdnet" - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - output_config_path = write_temp_config_path(None) - assert recover_config( - outputDir, output_config_path - ), "recover_config returned false" - assert os.path.exists(output_config_path), "Didn't create a config file" - - new_params = ConfigManager(output_config_path, version_check_flag=False) - assert new_params, "Created YAML could not be parsed by ConfigManager" - - sanitize_outputDir() - - print("passed") - - -def test_generic_deploy_docker(): - print("46: Testing deployment of a model to Docker") - # Train, then try deploying that model (requires an installed Docker engine) - - deploymentOutputDir = os.path.join(outputDir, "mlcube") - # read and parse csv - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_2d_rad_segmentation.csv" - ) - - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["num_channels"] = 3 - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters["data_preprocessing"]["resize_image"] = [224, 224] - parameters["memory_save_mode"] = True - - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - - custom_entrypoint = os.path.join( - gandlfRootDir, - "mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py", - ) - for entrypoint_script in [None, custom_entrypoint]: - result = run_deployment( - os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), - deploymentOutputDir, - "docker", - "model", - entrypoint_script=entrypoint_script, - configfile=testingDir + "/config_segmentation.yaml", - modeldir=outputDir, - requires_gpu=True, - ) - msg = "run_deployment returned false" - if entrypoint_script: - msg += " with custom entrypoint script" - assert result, msg - - sanitize_outputDir() - - print("passed") - - -def test_collision_subjectid_test_segmentation_rad_2d(device): - print("47: Starting 2D Rad segmentation tests for collision of subjectID in test") - parameters = ConfigManager( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["num_epochs"] = 1 - parameters["nested_training"]["testing"] = 1 - parameters["model"]["dimension"] = 2 - parameters["model"]["class_list"] = [0, 255] - parameters["model"]["amp"] = True - parameters["model"]["print_summary"] = False - parameters["model"]["num_channels"] = 3 - parameters["metrics"] = ["dice"] - parameters["model"]["architecture"] = "unet" - outputDir = os.path.join(testingDir, "data_output") - - file_config_temp = write_temp_config_path(parameters) - - # test the case where outputDir is explicitly provided to InferenceManager - train_data_path = inputDir + "/train_2d_rad_segmentation.csv" - test_data_path = inputDir + "/test_2d_rad_segmentation.csv" - df = pd.read_csv(train_data_path) - temp_df = pd.read_csv(train_data_path) - # Concatenate the two dataframes - df = pd.concat([df, temp_df], ignore_index=True) - - df.to_csv(test_data_path, index=False) - _, testing_data, _ = parseTestingCSV(test_data_path, outputDir) - # Save testing data to a csv file - testing_data.to_csv(test_data_path, index=False) - - main_run( - train_data_path + "," + train_data_path + "," + test_data_path, - file_config_temp, - outputDir, - False, - device, - resume=False, - reset=True, - ) - - sanitize_outputDir() - - print("passed") - - -def test_generic_random_numbers_are_deterministic_on_cpu(): - print("48: Starting testing deterministic random numbers generation") - - set_determinism(seed=42) - a, b = np.random.rand(3, 3), np.random.rand(3, 3) - - set_determinism(seed=42) - c, d = np.random.rand(3, 3), np.random.rand(3, 3) - - # Check that the generated random numbers are the same with numpy - assert np.allclose(a, c) - assert np.allclose(b, d) - - e, f = [random.random() for _ in range(5)], [random.random() for _ in range(5)] - - set_determinism(seed=42) - g, h = [random.random() for _ in range(5)], [random.random() for _ in range(5)] - - # Check that the generated random numbers are the same with Python's built-in random module - assert e == g - assert f == h - - print("passed") - - -def test_generic_cli_function_metrics_cli_rad_nd(): - print("49: Starting metric calculation tests") - for dim in ["2d", "3d"]: - for problem_type in ["segmentation", "classification", "synthesis"]: - synthesis_detected = problem_type == "synthesis" - problem_type_wrap = problem_type - if synthesis_detected: - problem_type_wrap = "classification" - # read and parse csv - training_data, _ = parseTrainingCSV( - inputDir + f"/train_{dim}_rad_{problem_type_wrap}.csv" - ) - if problem_type_wrap == "segmentation": - labels_array = training_data["Label"] - elif synthesis_detected: - labels_array = training_data["Channel_0"] - else: - labels_array = training_data["ValueToPredict"] - training_data["target"] = labels_array - training_data["prediction"] = labels_array - if synthesis_detected: - # this optional - training_data["mask"] = training_data["Label"] - - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + f"/config_{problem_type_wrap}.yaml", - version_check_flag=False, - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["2D"] - parameters["model"]["dimension"] = 2 - if dim == "3d": - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - - parameters["verbose"] = False - if synthesis_detected: - parameters["problem_type"] = problem_type - - temp_infer_csv = os.path.join(outputDir, "temp_csv.csv") - training_data.to_csv(temp_infer_csv, index=False) - - output_file = os.path.join(outputDir, "output.yaml") - - temp_config = write_temp_config_path(parameters) - - # run the metrics calculation - generate_metrics_dict(temp_infer_csv, temp_config, output_file) - - assert os.path.isfile(output_file), "Metrics output file was not generated" - - sanitize_outputDir() - - -def test_generic_deploy_metrics_docker(): - print("50: Testing deployment of a metrics generator to Docker") - # requires an installed Docker engine - - deploymentOutputDir = os.path.join(outputDir, "mlcube") - - result = run_deployment( - os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), - deploymentOutputDir, - "docker", - "metrics", - ) - - assert result, "run_deployment returned false" - sanitize_outputDir() - - print("passed") - - -def test_generic_data_split(): - print("51: Starting test for splitting and saving CSVs") - # read and initialize parameters for specific data dimension - parameters = ConfigManager( - testingDir + "/config_classification.yaml", version_check_flag=False - ) - parameters["nested_training"] = {"testing": 5, "validation": 5, "stratified": True} - # read and parse csv - training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_classification.csv") - # duplicate the data to test stratified sampling - training_data_duplicate = training_data._append(training_data) - for _ in range(1): - training_data_duplicate = training_data_duplicate._append( - training_data_duplicate - ) - training_data_duplicate.reset_index(drop=True, inplace=True) - # ensure subjects are not duplicated - training_data_duplicate["SubjectID"] = training_data_duplicate.index - - sanitize_outputDir() - - split_data_and_save_csvs(training_data_duplicate, outputDir, parameters) - - files_in_outputDir = os.listdir(outputDir) - assert len(files_in_outputDir) == 15, "CSVs were not split correctly" - - sanitize_outputDir() - - print("passed") From 06c9d804b082e850c96be12900c2c2b491cf0df2 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 12:23:10 +0300 Subject: [PATCH 12/34] update test_full --- testing/test_full.py | 5823 +++++++++++++++++++++--------------------- 1 file changed, 2912 insertions(+), 2911 deletions(-) diff --git a/testing/test_full.py b/testing/test_full.py index 06150bfb9..c28db39fb 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -243,2926 +243,2927 @@ def write_temp_config_path(parameters_to_write): # these are helper functions to be used in other tests -# def test_train_segmentation_rad_2d(device): -# print("03: Starting 2D Rad segmentation tests") -# # read and parse csv -# parameters = parseConfig( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters["data_preprocessing"]["resize_image"] = [224, 224] -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # read and initialize parameters for specific data dimension -# for model in all_models_segmentation: -# if model == "imagenet_unet": -# # imagenet_unet encoder needs to be toned down for small patch size -# parameters["model"]["encoder_name"] = "mit_b0" -# parameters["model"]["encoder_depth"] = 3 -# parameters["model"]["decoder_channels"] = (64, 32, 16) -# parameters["model"]["final_layer"] = random.choice( -# ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] -# ) -# parameters["model"]["converter_type"] = random.choice( -# ["acs", "soft", "conv3d"] -# ) - -# if model == "dynunet": -# # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py -# parameters["model"]["kernel_size"] = (3, 3, 3, 1) -# parameters["model"]["strides"] = (1, 1, 1, 1) -# parameters["model"]["deep_supervision"] = False - -# parameters["model"]["architecture"] = model -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_segmentation_sdnet_rad_2d(device): -# print("04: Starting 2D Rad segmentation tests") -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# # patch_size is custom for sdnet -# parameters["patch_size"] = [224, 224, 1] -# parameters["batch_size"] = 2 -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["num_channels"] = 1 -# parameters["model"]["architecture"] = "sdnet" -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# sanitize_outputDir() - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_segmentation_rad_3d(device): -# print("05: Starting 3D Rad segmentation tests") -# # read and parse csv -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_segmentation.csv" -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# parameters["model"]["class_list"] = [0, 1] -# parameters["model"]["final_layer"] = "softmax" -# parameters["model"]["amp"] = True -# parameters["in_memory"] = True -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# for model in all_models_segmentation: -# if model == "imagenet_unet": -# # imagenet_unet encoder needs to be toned down for small patch size -# parameters["model"]["encoder_name"] = "mit_b0" -# with pytest.raises(Exception) as exc_info: -# _ = global_models_dict[model](parameters) -# print("Exception raised:", exc_info.value) -# parameters["model"]["encoder_name"] = "resnet34" -# parameters["model"]["encoder_depth"] = 3 -# parameters["model"]["decoder_channels"] = (64, 32, 16) -# parameters["model"]["final_layer"] = random.choice( -# ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] -# ) -# parameters["model"]["converter_type"] = random.choice( -# ["acs", "soft", "conv3d"] -# ) - -# if model == "dynunet": -# # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py -# parameters["model"]["kernel_size"] = (3, 3, 3, 1) -# parameters["model"]["strides"] = (1, 1, 1, 1) -# parameters["model"]["deep_supervision"] = False - -# parameters["model"]["architecture"] = model -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_regression_rad_2d(device): -# print("06: Starting 2D Rad regression tests") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_regression.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["amp"] = False -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_regression.csv" -# ) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] -# parameters["scaling_factor"] = 1 -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# for model in all_models_regression: -# parameters["model"]["architecture"] = model -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_regression_rad_2d_imagenet(device): -# print("07: Starting 2D Rad regression tests for imagenet models") -# # read and initialize parameters for specific data dimension -# print("Starting 2D Rad regression tests for imagenet models") -# parameters = ConfigManager( -# testingDir + "/config_regression.yaml", version_check_flag=False -# ) -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["amp"] = False -# parameters["model"]["print_summary"] = False -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_regression.csv" -# ) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] -# parameters["scaling_factor"] = 1 -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# for model in all_models_classification: -# parameters["model"]["architecture"] = model -# parameters["nested_training"]["testing"] = 1 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_regression_brainage_rad_2d(device): -# print("08: Starting brain age tests") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_regression.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["amp"] = False -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_regression.csv" -# ) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] -# parameters["scaling_factor"] = 1 -# parameters["model"]["architecture"] = "brain_age" -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# # parameters_temp = copy.deepcopy(parameters) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# # file_config_temp = write_temp_config_path(parameters_temp) -# model_path = os.path.join(outputDir, "brain_age_best.pth.tar") -# config_path = os.path.join(outputDir, "parameters.pkl") -# optimization_result = post_training_model_optimization(model_path, config_path) -# assert optimization_result == False, "Optimization should fail" - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_regression_rad_3d(device): -# print("09: Starting 3D Rad regression tests") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_regression.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_regression.csv" -# ) -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# for model in all_models_regression: -# if "efficientnet" in model: -# parameters["patch_size"] = [16, 16, 16] -# else: -# parameters["patch_size"] = patch_size["3D"] - -# if model == "imagenet_unet": -# parameters["model"]["depth"] = 2 -# parameters["model"]["decoder_channels"] = [32, 16] -# parameters["model"]["encoder_weights"] = "None" -# parameters["model"]["converter_type"] = random.choice( -# ["acs", "soft", "conv3d"] -# ) -# parameters["model"]["architecture"] = model -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_classification_rad_2d(device): -# print("10: Starting 2D Rad classification tests") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["track_memory_usage"] = True -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# for model in all_models_regression: -# if model == "imagenet_unet": -# parameters["model"]["depth"] = 2 -# parameters["model"]["decoder_channels"] = [32, 16] -# parameters["model"]["encoder_weights"] = "None" -# parameters["model"]["converter_type"] = random.choice( -# ["acs", "soft", "conv3d"] -# ) -# parameters["model"]["architecture"] = model -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# # ensure sigmoid and softmax activations are tested for imagenet models -# for activation_type in ["sigmoid", "softmax"]: -# parameters["model"]["architecture"] = "imagenet_vgg11" -# parameters["model"]["final_layer"] = activation_type -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_classification_rad_3d(device): -# print("11: Starting 3D Rad classification tests") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# # loop through selected models and train for single epoch -# for model in all_models_regression: -# if "efficientnet" in model: -# parameters["patch_size"] = [16, 16, 16] -# else: -# parameters["patch_size"] = patch_size["3D"] -# if model == "imagenet_unet": -# parameters["model"]["encoder_name"] = "efficientnet-b0" -# parameters["model"]["depth"] = 1 -# parameters["model"]["decoder_channels"] = [64] -# parameters["model"]["final_layer"] = random.choice( -# ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] -# ) -# parameters["model"]["converter_type"] = random.choice( -# ["acs", "soft", "conv3d"] -# ) -# parameters["model"]["architecture"] = model -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_resume_inference_classification_rad_3d(device): -# print("12: Starting 3D Rad classification tests for resume and reset") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# model = all_models_regression[0] -# parameters["model"]["architecture"] = model -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# ## testing resume with parameter updates -# parameters["num_epochs"] = 2 -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# parameters["model"]["save_at_every_epoch"] = True -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=True, -# reset=False, -# ) - -# ## testing resume without parameter updates -# parameters["num_epochs"] = 1 -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=False, -# ) - -# parameters["output_dir"] = outputDir # this is in inference mode -# InferenceManager( -# dataframe=training_data, -# modelDir=outputDir, -# parameters=parameters, -# device=device, -# ) -# sanitize_outputDir() - -# print("passed") - - -# def test_train_inference_optimize_classification_rad_3d(device): -# print("13: Starting 3D Rad segmentation tests for optimization") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# parameters["model"]["architecture"] = all_models_regression[0] -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# # parameters_temp = copy.deepcopy(parameters) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# # file_config_temp = write_temp_config_path(parameters_temp) -# model_path = os.path.join(outputDir, all_models_regression[0] + "_best.pth.tar") -# config_path = os.path.join(outputDir, "parameters.pkl") -# optimization_result = post_training_model_optimization(model_path, config_path) -# assert optimization_result == True, "Optimization should pass" - -# ## testing inference -# for model_type in all_model_type: -# parameters["model"]["type"] = model_type -# parameters["output_dir"] = outputDir # this is in inference mode -# InferenceManager( -# dataframe=training_data, -# modelDir=outputDir, -# parameters=parameters, -# device=device, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_inference_optimize_segmentation_rad_2d(device): -# print("14: Starting 2D Rad segmentation tests for optimization") -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# parameters["patch_size"] = patch_size["2D"] -# parameters["modality"] = "rad" -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["save_output"] = True -# parameters["model"]["num_channels"] = 3 -# parameters["metrics"] = ["dice"] -# parameters["model"]["architecture"] = "resunet" -# parameters["model"]["onnx_export"] = True -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# ## testing inference -# for model_type in all_model_type: -# parameters["model"]["type"] = model_type -# parameters["output_dir"] = outputDir # this is in inference mode -# InferenceManager( -# dataframe=training_data, -# modelDir=outputDir, -# parameters=parameters, -# device=device, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_inference_classification_with_logits_single_fold_rad_3d(device): -# print("15: Starting 3D Rad classification tests for single fold logits inference") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# parameters["model"]["final_layer"] = "logits" -# # loop through selected models and train for single epoch -# model = all_models_regression[0] -# parameters["model"]["architecture"] = model -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# ## add stratified splitting -# parameters["nested_training"]["stratified"] = True - -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # duplicate the data to test stratified sampling -# training_data_duplicate = training_data._append(training_data) -# for _ in range(1): -# training_data_duplicate = training_data_duplicate._append( -# training_data_duplicate -# ) -# training_data_duplicate.reset_index(drop=True, inplace=True) -# # ensure subjects are not duplicated -# training_data_duplicate["SubjectID"] = training_data_duplicate.index - -# # ensure every part of the code is tested -# for folds in [2, 1, -5]: -# ## add stratified folding information -# parameters["nested_training"]["testing"] = folds -# parameters["nested_training"]["validation"] = folds if folds != 1 else -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data_duplicate, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# ## this is to test if inference can run without having ground truth column -# training_data.drop("ValueToPredict", axis=1, inplace=True) -# training_data.drop("Label", axis=1, inplace=True) -# temp_infer_csv = os.path.join(outputDir, "temp_infer_csv.csv") -# training_data.to_csv(temp_infer_csv, index=False) -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV(temp_infer_csv) -# parameters["output_dir"] = outputDir # this is in inference mode -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# parameters["model"]["final_layer"] = "logits" -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# model = all_models_regression[0] -# parameters["model"]["architecture"] = model -# parameters["model"]["onnx_export"] = False -# InferenceManager( -# dataframe=training_data, -# modelDir=outputDir, -# parameters=parameters, -# device=device, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_inference_classification_with_logits_multiple_folds_rad_3d(device): -# print("16: Starting 3D Rad classification tests for multi-fold logits inference") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# parameters["model"]["final_layer"] = "logits" -# # necessary for n-fold cross-validation inference -# parameters["nested_training"]["validation"] = 2 -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# model = all_models_regression[0] -# parameters["model"]["architecture"] = model -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# parameters["output_dir"] = outputDir # this is in inference mode -# InferenceManager( -# dataframe=training_data, -# modelDir=outputDir + "," + outputDir, -# parameters=parameters, -# device=device, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_scheduler_classification_rad_2d(device): -# print("17: Starting 2D Rad segmentation tests for scheduler") -# # read and initialize parameters for specific data dimension -# # loop through selected models and train for single epoch -# for scheduler in global_schedulers_dict: -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["architecture"] = "densenet121" -# parameters["model"]["norm_type"] = "instance" -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters["scheduler"] = {} -# parameters["scheduler"]["type"] = scheduler -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# ## ensure parameters are parsed every single time -# file_config_temp = write_temp_config_path(parameters) - -# parameters = ConfigManager(file_config_temp, version_check_flag=False) -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_optimizer_classification_rad_2d(device): -# print("18: Starting 2D Rad classification tests for optimizer") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["architecture"] = "densenet121" -# parameters["model"]["norm_type"] = "none" -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# for optimizer in global_optimizer_dict: -# parameters["optimizer"] = {} -# parameters["optimizer"]["type"] = optimizer -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# if os.path.exists(outputDir): -# shutil.rmtree(outputDir) # overwrite previous results -# Path(outputDir).mkdir(parents=True, exist_ok=True) -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_clip_train_classification_rad_3d(device): -# print("19: Starting 3D Rad classification tests for clipping") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters["model"]["architecture"] = "vgg16" -# parameters["model"]["norm_type"] = "None" -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# for clip_mode in all_clip_modes: -# parameters["clip_mode"] = clip_mode -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# sanitize_outputDir() - -# print("passed") - - -# def test_train_normtype_segmentation_rad_3d(device): -# print("20: Starting 3D Rad segmentation tests for normtype") -# # read and initialize parameters for specific data dimension -# # read and parse csv -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_segmentation.csv" -# ) -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# parameters["model"]["class_list"] = [0, 1] -# parameters["model"]["amp"] = True -# parameters["save_output"] = True -# parameters["data_postprocessing"] = {"fill_holes"} -# parameters["in_memory"] = True -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) - -# # these should raise exceptions -# for norm_type in ["none", None]: -# parameters["model"]["norm_type"] = norm_type -# file_config_temp = write_temp_config_path(parameters) -# with pytest.raises(Exception) as exc_info: -# parameters = ConfigManager(file_config_temp, version_check_flag=False) - -# print("Exception raised:", exc_info.value) - -# # loop through selected models and train for single epoch -# for norm in all_norm_types: -# for model in ["resunet", "unet", "fcn", "unetr"]: -# parameters["model"]["architecture"] = model -# parameters["model"]["norm_type"] = norm -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# if os.path.isdir(outputDir): -# shutil.rmtree(outputDir) # overwrite previous results -# Path(outputDir).mkdir(parents=True, exist_ok=True) -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_metrics_segmentation_rad_2d(device): -# print("21: Starting 2D Rad segmentation tests for metrics") -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["data_postprocessing"] = {"mapping": {0: 0, 255: 1}} -# parameters["model"]["amp"] = True -# parameters["save_output"] = True -# parameters["model"]["num_channels"] = 3 -# parameters["metrics"] = [ -# "dice", -# "hausdorff", -# "hausdorff95", -# "normalized_surface_dice", -# "sensitivity", -# "sensitivity_per_label", -# "specificity_segmentation", -# "specificity_segmentation_per_label", -# "jaccard", -# "jaccard_per_label", -# ] -# parameters["model"]["architecture"] = "resunet" -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# file_config_temp = write_temp_config_path(parameters) - -# parameters = ConfigManager(file_config_temp, version_check_flag=False) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_metrics_regression_rad_2d(device): -# print("22: Starting 2D Rad regression tests for metrics") -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_regression.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_regression.csv" -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["norm_type"] = "instance" -# parameters["model"]["amp"] = False -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["architecture"] = "vgg11" -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = True -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_losses_segmentation_rad_2d(device): -# print("23: Starting 2D Rad segmentation tests for losses") - -# # healper function to read and parse yaml and return parameters -# def get_parameters_after_alteration(loss_type: str) -> dict: -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# parameters["loss_function"] = loss_type -# file_config_temp = write_temp_config_path(parameters) -# # read and parse csv -# parameters = ConfigManager(file_config_temp, version_check_flag=True) -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# # disabling amp because some losses do not support Half, yet -# parameters["model"]["amp"] = False -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["architecture"] = "resunet" -# parameters["metrics"] = ["dice"] -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# return parameters, training_data - -# # loop through selected models and train for single epoch -# for loss_type in [ -# "dc", -# "dc_log", -# "dcce", -# "dcce_logits", -# "tversky", -# "focal", -# "dc_focal", -# "mcc", -# "mcc_log", -# ]: -# parameters, training_data = get_parameters_after_alteration(loss_type) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_config_read(): -# print("24: Starting testing reading configuration") -# parameters = ConfigManager( -# os.path.join(baseConfigDir, "config_all_options.yaml"), version_check_flag=False -# ) -# parameters["data_preprocessing"]["resize_image"] = [128, 128] - -# file_config_temp = write_temp_config_path(parameters) - -# # read and parse csv -# parameters = ConfigManager(file_config_temp, version_check_flag=True) - -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# assert parameters is not None, "parameters is None" -# data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") -# assert data_loader is not None, "data_loader is None" - -# os.remove(file_config_temp) - -# # ensure resize_image is triggered -# parameters["data_preprocessing"].pop("resample") -# parameters["data_preprocessing"].pop("resample_min") -# parameters["data_preprocessing"]["resize_image"] = [128, 128] -# parameters["model"]["print_summary"] = False - -# with open(file_config_temp, "w") as file: -# yaml.dump(parameters, file) - -# parameters = ConfigManager(file_config_temp, version_check_flag=True) - -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# assert parameters is not None, "parameters is None" -# data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") -# assert data_loader is not None, "data_loader is None" - -# os.remove(file_config_temp) - -# # ensure resize_patch is triggered -# parameters["data_preprocessing"].pop("resize_image") -# parameters["data_preprocessing"]["resize_patch"] = [64, 64] - -# with open(file_config_temp, "w") as file: -# yaml.dump(parameters, file) - -# parameters = ConfigManager(file_config_temp, version_check_flag=True) - -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# assert parameters is not None, "parameters is None" -# data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") -# assert data_loader is not None, "data_loader is None" - -# os.remove(file_config_temp) - -# # ensure resize_image is triggered -# parameters["data_preprocessing"].pop("resize_patch") -# parameters["data_preprocessing"]["resize"] = [64, 64] - -# with open(file_config_temp, "w") as file: -# yaml.dump(parameters, file) - -# parameters = ConfigManager(file_config_temp, version_check_flag=True) - -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# assert parameters is not None, "parameters is None" -# data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") -# assert data_loader is not None, "data_loader is None" - -# os.remove(file_config_temp) - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_cli_function_preprocess(): -# print("25: Starting testing cli function preprocess") -# file_config = os.path.join(testingDir, "config_segmentation.yaml") -# sanitize_outputDir() -# file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") - -# input_data_df, _ = parseTrainingCSV(file_data, train=False) -# # add random metadata to ensure it gets preserved -# input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] -# input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) -# input_data_df["metadata_test_int"] = np.random.randint( -# 0, 100, input_data_df.shape[0] -# ) -# temp_csv = os.path.join(outputDir, "temp.csv") -# input_data_df.to_csv(temp_csv) - -# parameters = ConfigManager(file_config) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = "[0, 255||125]" -# # disabling amp because some losses do not support Half, yet -# parameters["model"]["amp"] = False -# parameters["model"]["print_summary"] = False -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["architecture"] = "unet" -# parameters["metrics"] = ["dice"] -# parameters["patch_sampler"] = { -# "type": "label", -# "enable_padding": True, -# "biased_sampling": True, -# } -# parameters["weighted_loss"] = True -# parameters["save_output"] = True -# parameters["data_preprocessing"]["to_canonical"] = None -# parameters["data_preprocessing"]["rgba_to_rgb"] = None - -# file_config_temp = write_temp_config_path(parameters) - -# preprocess_and_save(temp_csv, file_config_temp, outputDir) -# training_data, parameters["headers"] = parseTrainingCSV( -# outputDir + "/data_processed.csv" -# ) - -# # check that the length of training data is what we expect -# assert ( -# len(training_data) == input_data_df.shape[0] -# ), "Number of subjects in dataframe is not same as that of input dataframe" -# assert ( -# len(training_data.columns) == len(input_data_df.columns) + 1 -# ), "Number of columns in output dataframe is not same as that of input dataframe" # the +1 is for the added index column -# sanitize_outputDir() - -# ## regression/classification preprocess -# file_config = os.path.join(testingDir, "config_regression.yaml") -# parameters = ConfigManager(file_config) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["amp"] = False -# # read and parse csv -# parameters["model"]["num_channels"] = 3 -# parameters["scaling_factor"] = 1 -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters["data_preprocessing"]["to_canonical"] = None -# parameters["data_preprocessing"]["rgba_to_rgb"] = None -# file_data = os.path.join(inputDir, "train_2d_rad_regression.csv") -# input_data_df, _ = parseTrainingCSV(file_data, train=False) -# # add random metadata to ensure it gets preserved -# input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] -# input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) -# input_data_df["metadata_test_int"] = np.random.randint( -# 0, 100, input_data_df.shape[0] -# ) -# input_data_df.to_csv(temp_csv) - -# # store this separately for preprocess testing -# with open(file_config_temp, "w") as outfile: -# yaml.dump(parameters, outfile, default_flow_style=False) - -# preprocess_and_save(temp_csv, file_config_temp, outputDir) -# training_data, parameters["headers"] = parseTrainingCSV( -# outputDir + "/data_processed.csv" -# ) - -# # check that the length of training data is what we expect -# assert ( -# len(training_data) == input_data_df.shape[0] -# ), "Number of subjects in dataframe is not same as that of input dataframe" -# assert ( -# len(training_data.columns) == len(input_data_df.columns) + 1 -# ), "Number of columns in output dataframe is not same as that of input dataframe" # the +1 is for the added index column -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_cli_function_mainrun(device): -# print("26: Starting testing cli function main_run") -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) - -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["num_epochs"] = 1 -# parameters["nested_training"]["testing"] = 1 -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["print_summary"] = False -# parameters["model"]["num_channels"] = 3 -# parameters["metrics"] = ["dice"] -# parameters["model"]["architecture"] = "unet" - -# file_config_temp = write_temp_config_path(parameters) - -# file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") - -# main_run( -# file_data, file_config_temp, outputDir, True, device, resume=False, reset=True -# ) -# sanitize_outputDir() - -# with open(file_config_temp, "w") as file: -# yaml.dump(parameters, file) - -# # testing train/valid split -# main_run( -# file_data + "," + file_data, -# file_config_temp, -# outputDir, -# True, -# device, -# resume=False, -# reset=True, -# ) - -# with open(file_config_temp, "w") as file: -# yaml.dump(parameters, file) - -# # testing train/valid/test split with resume -# main_run( -# file_data + "," + file_data + "," + file_data, -# file_config_temp, -# outputDir, -# True, -# device, -# resume=True, -# reset=False, -# ) -# sanitize_outputDir() - -# print("passed") - - -# def test_dataloader_construction_train_segmentation_3d(device): -# print("27: Starting 3D Rad segmentation tests") -# # read and parse csv -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# params_all_preprocessing_and_augs = ConfigManager( -# os.path.join(baseConfigDir, "config_all_options.yaml") -# ) - -# # take preprocessing and augmentations from all options -# for key in ["data_preprocessing", "data_augmentation"]: -# parameters[key] = params_all_preprocessing_and_augs[key] - -# # customize parameters to maximize test coverage -# parameters["data_preprocessing"].pop("normalize", None) -# parameters["data_preprocessing"]["normalize_nonZero"] = None -# parameters["data_preprocessing"]["default_probability"] = 1 -# parameters.pop("nested_training", None) -# parameters["nested_training"] = {} -# parameters["nested_training"]["testing"] = 1 -# parameters["nested_training"]["validation"] = -5 - -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_segmentation.csv" -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["save_training"] = True -# parameters["save_output"] = True -# parameters["model"]["dimension"] = 3 -# parameters["model"]["class_list"] = [0, 1] -# parameters["model"]["amp"] = True -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters["model"]["architecture"] = "unet" -# parameters["weighted_loss"] = False -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters["data_postprocessing"]["mapping"] = {0: 0, 1: 1} -# parameters["data_postprocessing"]["fill_holes"] = True -# parameters["data_postprocessing"]["cca"] = True -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_preprocess_functions(): -# print("28: Starting testing preprocessing functions") -# # initialize an input which has values between [-1,1] -# # checking tensor with last dimension of size 1 -# input_tensor = torch.rand(4, 256, 256, 1) -# input_transformed = global_preprocessing_dict["rgba2rgb"]()(input_tensor) -# assert input_transformed.shape[0] == 3, "Number of channels is not 3" -# assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" - -# input_tensor = torch.rand(3, 256, 256, 1) -# input_transformed = global_preprocessing_dict["rgb2rgba"]()(input_tensor) -# assert input_transformed.shape[0] == 4, "Number of channels is not 4" -# assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" - -# input_tensor = 2 * torch.rand(3, 256, 256, 1) - 1 -# input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) -# input_tensor = 2 * torch.rand(1, 3, 256, 256) - 1 -# input_transformed = global_preprocessing_dict["normalize_imagenet"](input_tensor) -# input_transformed = global_preprocessing_dict["normalize_standardize"](input_tensor) -# input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) -# parameters_dict = {} -# parameters_dict["min"] = 0.25 -# parameters_dict["max"] = 0.75 -# input_transformed = global_preprocessing_dict["threshold"](parameters_dict)( -# input_tensor -# ) -# assert ( -# torch.count_nonzero( -# input_transformed[input_transformed < parameters_dict["min"]] -# > parameters_dict["max"] -# ) -# == 0 -# ), "Input should be thresholded" - -# input_transformed = global_preprocessing_dict["clip"](parameters_dict)(input_tensor) -# assert ( -# torch.count_nonzero( -# input_transformed[input_transformed < parameters_dict["min"]] -# > parameters_dict["max"] -# ) -# == 0 -# ), "Input should be clipped" - -# non_zero_normalizer = global_preprocessing_dict["normalize_nonZero_masked"] -# input_transformed = non_zero_normalizer(input_tensor) -# non_zero_normalizer = global_preprocessing_dict["normalize_positive"] -# input_transformed = non_zero_normalizer(input_tensor) -# non_zero_normalizer = global_preprocessing_dict["normalize_nonZero"] -# input_transformed = non_zero_normalizer(input_tensor) - -# ## stain_normalization checks -# input_tensor = 2 * torch.rand(3, 256, 256, 1) + 10 -# training_data, _ = parseTrainingCSV(inputDir + "/train_2d_rad_segmentation.csv") -# parameters_temp = {} -# parameters_temp["data_preprocessing"] = {} -# parameters_temp["data_preprocessing"]["stain_normalizer"] = { -# "target": training_data["Channel_0"][0] -# } -# for extractor in ["ruifrok", "macenko", "vahadane"]: -# parameters_temp["data_preprocessing"]["stain_normalizer"][ -# "extractor" -# ] = extractor -# non_zero_normalizer = global_preprocessing_dict["stain_normalizer"]( -# parameters_temp["data_preprocessing"]["stain_normalizer"] -# ) -# input_transformed = non_zero_normalizer(input_tensor) - -# ## histogram matching tests -# # histogram equalization -# input_tensor = torch.rand(1, 64, 64, 64) -# parameters_temp = {} -# parameters_temp["data_preprocessing"] = {} -# parameters_temp["data_preprocessing"]["histogram_matching"] = {} -# non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( -# parameters_temp["data_preprocessing"]["histogram_matching"] -# ) -# input_transformed = non_zero_normalizer(input_tensor) -# # adaptive histogram equalization -# parameters_temp = {} -# parameters_temp["data_preprocessing"] = {} -# parameters_temp["data_preprocessing"]["histogram_matching"] = {"target": "adaptive"} -# non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( -# parameters_temp["data_preprocessing"]["histogram_matching"] -# ) -# input_transformed = non_zero_normalizer(input_tensor) -# # histogram matching -# training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_segmentation.csv") -# parameters_temp = {} -# parameters_temp["data_preprocessing"] = {} -# parameters_temp["data_preprocessing"]["histogram_matching"] = { -# "target": training_data["Channel_0"][0] -# } -# non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( -# parameters_temp["data_preprocessing"]["histogram_matching"] -# ) -# input_transformed = non_zero_normalizer(input_tensor) - -# # fill holes -# input_tensor = torch.rand(1, 256, 256, 256) > 0.5 -# input_transformed = fill_holes(input_tensor) - -# ## CCA tests -# # 3d -# input_tensor = torch.rand(1, 256, 256, 256) > 0.5 -# input_transformed = cca(input_tensor) -# # 2d -# input_tensor = torch.rand(1, 256, 256) > 0.5 -# input_transformed = cca(input_tensor) -# # 2d rgb -# input_tensor = torch.rand(1, 3, 256, 256) > 0.5 -# input_transformed = cca(input_tensor) - -# input_tensor = torch.rand(1, 256, 256, 256) -# cropper = global_preprocessing_dict["crop_external_zero_planes"]( -# patch_size=[128, 128, 128] -# ) -# input_transformed = cropper(input_tensor) - -# cropper = global_preprocessing_dict["crop"]([64, 64, 64]) -# input_transformed = cropper(input_tensor) -# assert input_transformed.shape == (1, 128, 128, 128), "Cropping should work" - -# cropper = global_preprocessing_dict["centercrop"]([128, 128, 128]) -# input_transformed = cropper(input_tensor) -# assert input_transformed.shape == (1, 128, 128, 128), "Center-crop should work" - -# # test pure morphological operations -# input_tensor_3d = torch.rand(1, 1, 256, 256, 256) -# input_tensor_2d = torch.rand(1, 3, 256, 256) -# for mode in ["dilation", "erosion", "opening", "closing"]: -# input_transformed_3d = torch_morphological(input_tensor_3d, mode=mode) -# assert len(input_transformed_3d.shape) == 5, "Output should be 5D" -# input_transformed_2d = torch_morphological(input_tensor_2d, mode=mode) -# assert len(input_transformed_2d.shape) == 4, "Output should be 4D" - -# # test for failure -# with pytest.raises(Exception) as exc_info: -# input_tensor_4d = torch.rand(1, 1, 32, 32, 32, 32) -# input_transformed_3d = torch_morphological(input_tensor_4d) - -# print("Exception raised:", exc_info.value) - -# # test obtaining arrays -# input_tensor_3d = torch.rand(256, 256, 256) -# input_array = get_array_from_image_or_tensor(input_tensor_3d) -# assert isinstance(input_array, np.ndarray), "Array should be obtained from tensor" -# input_image = sitk.GetImageFromArray(input_array) -# input_array = get_array_from_image_or_tensor(input_image) -# assert isinstance(input_array, np.ndarray), "Array should be obtained from image" -# input_array = get_array_from_image_or_tensor(input_array) -# assert isinstance(input_array, np.ndarray), "Array should be obtained from array" - -# with pytest.raises(Exception) as exc_info: -# input_list = [0, 1] -# input_array = get_array_from_image_or_tensor(input_list) -# exception_raised = exc_info.value -# print("Exception raised: ", exception_raised) - -# ## image rescaling test -# input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) -# # try out different options -# for params in [ -# {}, -# None, -# {"in_min_max": [5, 250], "out_min_max": [-1, 2]}, -# {"out_min_max": [0, 1], "percentiles": [5, 95]}, -# ]: -# rescaler = global_preprocessing_dict["rescale"](params) -# input_transformed = rescaler(input_tensor) -# assert ( -# input_transformed.min() >= rescaler.out_min_max[0] -# ), "Rescaling should work for min" -# assert ( -# input_transformed.max() <= rescaler.out_min_max[1] -# ), "Rescaling should work for max" - -# # tests for histology alpha check -# input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) -# _ = get_nonzero_percent(input_tensor) -# assert not ( -# alpha_rgb_2d_channel_check(input_tensor) -# ), "Alpha channel check should work for 4D tensors" -# input_tensor = torch.randint(0, 256, (64, 64, 64)) -# assert not ( -# alpha_rgb_2d_channel_check(input_tensor) -# ), "Alpha channel check should work for 3D images" -# input_tensor = torch.randint(0, 256, (64, 64, 4)) -# assert not ( -# alpha_rgb_2d_channel_check(input_tensor) -# ), "Alpha channel check should work for generic 4D images" -# input_tensor = torch.randint(0, 256, (64, 64)) -# assert alpha_rgb_2d_channel_check( -# input_tensor -# ), "Alpha channel check should work for grayscale 2D images" -# input_tensor = torch.randint(0, 256, (64, 64, 3)) -# assert alpha_rgb_2d_channel_check( -# input_tensor -# ), "Alpha channel check should work for RGB images" -# input_tensor = torch.randint(0, 256, (64, 64, 4)) -# input_tensor[:, :, 3] = 255 -# assert alpha_rgb_2d_channel_check( -# input_tensor -# ), "Alpha channel check should work for RGBA images" -# input_array = torch.randint(0, 256, (64, 64, 3)).numpy() -# temp_filename = os.path.join(outputDir, "temp.png") -# cv2.imwrite(temp_filename, input_array) -# temp_filename_tiff = convert_to_tiff(temp_filename, outputDir) -# assert os.path.exists(temp_filename_tiff), "Tiff file should be created" - -# # resize tests -# input_tensor = np.random.randint(0, 255, size=(20, 20, 20)) -# input_image = sitk.GetImageFromArray(input_tensor) -# expected_output = (10, 10, 10) -# input_transformed = resize_image(input_image, expected_output) -# assert input_transformed.GetSize() == expected_output, "Resize should work" -# input_tensor = np.random.randint(0, 255, size=(20, 20)) -# input_image = sitk.GetImageFromArray(input_tensor) -# expected_output = [10, 10] -# output_size_dict = {"resize": expected_output} -# input_transformed = resize_image(input_image, output_size_dict) -# assert list(input_transformed.GetSize()) == expected_output, "Resize should work" - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_augmentation_functions(): -# print("29: Starting testing augmentation functions") -# params_all_preprocessing_and_augs = ConfigManager( -# os.path.join(baseConfigDir, "config_all_options.yaml") -# ) - -# # this is for rgb augmentation -# input_tensor = torch.rand(3, 128, 128, 1) -# temp = global_augs_dict["colorjitter"]( -# params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"] -# ) -# output_tensor = None -# output_tensor = temp(input_tensor) -# assert output_tensor != None, "RGB Augmentation should work" - -# # ensuring all code paths are covered -# for key in ["brightness", "contrast", "saturation", "hue"]: -# params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"][ -# key -# ] = 0.25 -# temp = global_augs_dict["colorjitter"]( -# params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"] -# ) -# output_tensor = None -# output_tensor = temp(input_tensor) -# assert output_tensor != None, "RGB Augmentation should work" - -# # testing HED transforms with different options -# input_tensor = torch.rand(3, 128, 128, 1) -# params = { -# "data_augmentation": { -# "hed_transform": {}, -# # "hed_transform_light": {}, -# # "hed_transform_heavy": {}, -# } -# } -# temp = global_augs_dict["hed_transform"]( -# params_all_preprocessing_and_augs["data_augmentation"]["hed_transform"] -# ) -# ranges = [ -# "haematoxylin_bias_range", -# "eosin_bias_range", -# "dab_bias_range", -# "haematoxylin_sigma_range", -# "eosin_sigma_range", -# "dab_sigma_range", -# ] - -# default_range = [-0.1, 0.1] -# for key in ranges: -# params["data_augmentation"]["hed_transform"].setdefault(key, default_range) - -# params["data_augmentation"]["hed_transform"].setdefault( -# "cutoff_range", [0.05, 0.95] -# ) - -# # Check if the params are correctly set for each augmentation type -# assert params["data_augmentation"]["hed_transform"] == { -# "haematoxylin_bias_range": [-0.1, 0.1], -# "eosin_bias_range": [-0.1, 0.1], -# "dab_bias_range": [-0.1, 0.1], -# "haematoxylin_sigma_range": [-0.1, 0.1], -# "eosin_sigma_range": [-0.1, 0.1], -# "dab_sigma_range": [-0.1, 0.1], -# "cutoff_range": [0.05, 0.95], -# } -# temp = global_augs_dict["hed_transform"]( -# params_all_preprocessing_and_augs["data_augmentation"]["hed_transform"] -# ) -# output_tensor = None -# output_tensor = temp(input_tensor) -# assert output_tensor != None, "HED Augmentation should work" - -# # this is for all other augmentations -# input_tensor = torch.rand(3, 128, 128, 128) -# for aug in params_all_preprocessing_and_augs["data_augmentation"]: -# aug_lower = aug.lower() -# output_tensor = None -# if aug_lower in global_augs_dict: -# output_tensor = global_augs_dict[aug]( -# params_all_preprocessing_and_augs["data_augmentation"][aug_lower] -# )(input_tensor) -# assert output_tensor != None, "Augmentation should work" - -# # additional test for elastic -# params_elastic = params_all_preprocessing_and_augs["data_augmentation"]["elastic"] -# for key_to_pop in ["num_control_points", "max_displacement", "locked_borders"]: -# params_elastic.pop(key_to_pop, None) -# output_tensor = global_augs_dict["elastic"](params_elastic)(input_tensor) -# assert output_tensor != None, "Augmentation for base elastic transform should work" - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_checkpointing_segmentation_rad_2d(device): -# print("30: Starting 2D Rad segmentation tests for metrics") -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# parameters["patch_sampler"] = { -# "type": "label", -# "enable_padding": True, -# "biased_sampling": True, -# } -# file_config_temp = write_temp_config_path(parameters) -# parameters = ConfigManager(file_config_temp, version_check_flag=False) - -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["num_epochs"] = 1 -# parameters["nested_training"]["testing"] = 1 -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["num_channels"] = 3 -# parameters["metrics"] = [ -# "dice", -# "dice_per_label", -# "hausdorff", -# "hausdorff95", -# "hd95_per_label", -# "hd100_per_label", -# "normalized_surface_dice", -# "normalized_surface_dice_per_label", -# "sensitivity", -# "sensitivity_per_label", -# "specificity_segmentation", -# "specificity_segmentation_per_label", -# "jaccard", -# "jaccard_per_label", -# ] -# parameters["model"]["architecture"] = "unet" -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# parameters["num_epochs"] = 2 -# parameters["nested_training"]["validation"] = -2 -# parameters["nested_training"]["testing"] = 1 -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=False, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_model_patch_divisibility(): -# print("31: Starting patch divisibility tests") -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# _, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# parameters["model"]["architecture"] = "unet" -# parameters["patch_size"] = [127, 127, 1] -# parameters["num_epochs"] = 1 -# parameters["nested_training"]["testing"] = 1 -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["print_summary"] = False -# parameters["model"]["num_channels"] = 3 -# parameters["metrics"] = ["dice"] -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) - -# # this assertion should fail -# with pytest.raises(BaseException) as _: -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# parameters["model"]["architecture"] = "uinc" -# parameters["model"]["base_filters"] = 11 - -# # this assertion should fail -# with pytest.raises(BaseException) as _: -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_one_hot_logic(): -# print("32: Starting one hot logic tests") -# random_array = np.random.randint(5, size=(20, 20, 20)) -# img = sitk.GetImageFromArray(random_array) -# img_tensor = get_tensor_from_image(img).to(torch.float16) -# img_tensor = img_tensor.unsqueeze(0).unsqueeze(0) - -# class_list = [*range(0, np.max(random_array) + 1)] -# img_tensor_oh = one_hot(img_tensor, class_list) -# img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) -# comparison = random_array == img_tensor_oh_rev_array -# assert comparison.all(), "Arrays are not equal" - -# class_list = ["0", "1||2||3", np.max(random_array)] -# img_tensor_oh = one_hot(img_tensor, class_list) -# img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) - -# # check for background -# comparison = (random_array == 0) == (img_tensor_oh_rev_array == 0) -# assert comparison.all(), "Arrays at '0' are not equal" - -# # check last foreground -# comparison = (random_array == np.max(random_array)) == ( -# img_tensor_oh_rev_array == len(class_list) - 1 -# ) -# assert comparison.all(), "Arrays at final foreground are not equal" - -# # check combined foreground -# combined_array = np.logical_or( -# np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) -# ) -# comparison = combined_array == (img_tensor_oh_rev_array == 1) -# assert comparison.all(), "Arrays at the combined foreground are not equal" - -# parameters = {"data_postprocessing": {}} -# mapped_output = get_mapped_label( -# torch.from_numpy(img_tensor_oh_rev_array), parameters -# ) - -# parameters = {} -# mapped_output = get_mapped_label( -# torch.from_numpy(img_tensor_oh_rev_array), parameters -# ) - -# parameters = {"data_postprocessing": {"mapping": {0: 0, 1: 1, 2: 5}}} -# mapped_output = get_mapped_label( -# torch.from_numpy(img_tensor_oh_rev_array), parameters -# ) - -# for key, value in parameters["data_postprocessing"]["mapping"].items(): -# comparison = (img_tensor_oh_rev_array == key) == (mapped_output == value) -# assert comparison.all(), "Arrays at {}:{} are not equal".format(key, value) - -# # check the case where 0 is present as an int in a special case -# class_list = [0, "1||2||3", np.max(random_array)] -# img_tensor_oh = one_hot(img_tensor, class_list) -# img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) - -# # check for background -# comparison = (random_array == 0) == (img_tensor_oh_rev_array == 0) -# assert comparison.all(), "Arrays at '0' are not equal" - -# # check the case where 0 is absent from class_list -# class_list = ["1||2||3", np.max(random_array)] -# img_tensor_oh = one_hot(img_tensor, class_list) -# img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) - -# # check last foreground -# comparison = (random_array == np.max(random_array)) == ( -# img_tensor_oh_rev_array == len(class_list) -# ) -# assert comparison.all(), "Arrays at final foreground are not equal" - -# # check combined foreground -# combined_array = np.logical_or( -# np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) -# ) -# comparison = combined_array == (img_tensor_oh_rev_array == 1) -# assert comparison.all(), "Arrays at the combined foreground are not equal" - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_anonymizer(): -# print("33: Starting anomymizer tests") -# input_file = get_testdata_file("MR_small.dcm") - -# output_file = os.path.join(outputDir, "MR_small_anonymized.dcm") - -# config_file = os.path.join(baseConfigDir, "config_anonymizer.yaml") - -# run_anonymizer(input_file, output_file, config_file, "rad") -# assert os.path.exists(output_file), "Anonymized file does not exist" - -# # test defaults -# run_anonymizer(input_file, output_file, None, "rad") -# assert os.path.exists(output_file), "Anonymized file does not exist" - -# # test nifti conversion -# config_file_for_nifti = os.path.join(outputDir, "config_anonymizer_nifti.yaml") -# with open(config_file, "r") as file_data: -# yaml_data = file_data.read() -# parameters = yaml.safe_load(yaml_data) -# parameters["convert_to_nifti"] = True -# with open(config_file_for_nifti, "w") as file: -# yaml.dump(parameters, file) - -# # for nifti conversion, the input needs to be in a dir -# input_folder_for_nifti = os.path.join(outputDir, "nifti_input") -# Path(input_folder_for_nifti).mkdir(parents=True, exist_ok=True) -# shutil.copyfile(input_file, os.path.join(input_folder_for_nifti, "MR_small.dcm")) - -# output_file = os.path.join(outputDir, "MR_small.nii.gz") - -# run_anonymizer(input_folder_for_nifti, output_file, config_file_for_nifti, "rad") -# assert os.path.exists(output_file), "Anonymized file does not exist" - -# if not os.path.exists(output_file): -# raise Exception("Output NIfTI file was not created") - -# input_file = os.path.join(inputDir, "2d_histo_segmentation", "1", "image.tiff") -# output_file_histo = os.path.join(outputDir, "histo_anon.tiff") -# # this assertion should fail since histo anonymizer is not implementer -# with pytest.raises(BaseException) as exc_info: -# run_anonymizer(input_folder_for_nifti, output_file_histo, None, "histo") -# assert os.path.exists(output_file_histo), "Anonymized file does not exist" -# print("Exception raised: ", exc_info.value) -# sanitize_outputDir() - -# print("passed") - - -# def test_train_inference_segmentation_histology_2d(device): -# print("34: Starting histology train/inference segmentation tests") -# # overwrite previous results -# sanitize_outputDir() -# output_dir_patches = os.path.join(outputDir, "histo_patches") -# if os.path.isdir(output_dir_patches): -# shutil.rmtree(output_dir_patches) -# Path(output_dir_patches).mkdir(parents=True, exist_ok=True) -# output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") -# Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) - -# parameters_patch = {} -# # extracting minimal number of patches to ensure that the test does not take too long -# parameters_patch["num_patches"] = 10 -# parameters_patch["read_type"] = "sequential" -# # define patches to be extracted in terms of microns -# parameters_patch["patch_size"] = ["1000m", "1000m"] - -# file_config_temp = write_temp_config_path(parameters_patch) - -# patch_extraction( -# inputDir + "/train_2d_histo_segmentation.csv", -# output_dir_patches_output, -# file_config_temp, -# ) - -# file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) -# parameters["patch_size"] = patch_size["2D"] -# parameters["modality"] = "histo" -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["num_channels"] = 3 -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# parameters["model"]["architecture"] = "resunet" -# parameters["nested_training"]["testing"] = 1 -# parameters["nested_training"]["validation"] = -2 -# parameters["metrics"] = ["dice"] -# parameters["model"]["onnx_export"] = True -# parameters["model"]["print_summary"] = True -# parameters["data_preprocessing"]["resize_image"] = [128, 128] -# modelDir = os.path.join(outputDir, "modelDir") -# Path(modelDir).mkdir(parents=True, exist_ok=True) -# TrainingManager( -# dataframe=training_data, -# outputDir=modelDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# inference_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_histo_segmentation.csv", train=False -# ) -# inference_data.drop(index=inference_data.index[-1], axis=0, inplace=True) -# InferenceManager( -# dataframe=inference_data, -# modelDir=modelDir, -# parameters=parameters, -# device=device, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_inference_classification_histology_large_2d(device): -# print( -# "35: Starting histology train/inference classification tests for large images to check exception handling" -# ) -# # overwrite previous results -# sanitize_outputDir() -# output_dir_patches = os.path.join(outputDir, "histo_patches") -# if os.path.isdir(output_dir_patches): -# shutil.rmtree(output_dir_patches) -# Path(output_dir_patches).mkdir(parents=True, exist_ok=True) -# output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") -# Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) - -# for sub in ["1", "2"]: -# file_to_check = os.path.join( -# inputDir, "2d_histo_segmentation", sub, "image_resize.tiff" -# ) -# if os.path.exists(file_to_check): -# os.remove(file_to_check) - -# parameters_patch = {} -# # extracting minimal number of patches to ensure that the test does not take too long -# parameters_patch["num_patches"] = 3 -# parameters_patch["patch_size"] = [128, 128] -# parameters_patch["value_map"] = {0: 0, 255: 255} - -# file_config_temp = write_temp_config_path(parameters_patch) - -# patch_extraction( -# inputDir + "/train_2d_histo_classification.csv", -# output_dir_patches_output, -# file_config_temp, -# ) - -# # resize the image -# input_df, _ = parseTrainingCSV( -# inputDir + "/train_2d_histo_classification.csv", train=False -# ) -# files_to_delete = [] - -# def resize_for_ci(filename, scale): -# """ -# Helper function to resize images in CI - -# Args: -# filename (str): Filename of the image to be resized -# scale (float): Scale factor to resize the image - -# Returns: -# str: Filename of the resized image -# """ -# new_filename = filename.replace(".tiff", "_resize.tiff") -# try: -# img = cv2.imread(filename) -# dims = img.shape -# img_resize = cv2.resize(img, (dims[1] * scale, dims[0] * scale)) -# cv2.imwrite(new_filename, img_resize) -# except Exception as ex1: -# # this is only used in CI -# print("Trying vips:", ex1) -# try: -# os.system( -# "vips resize " + filename + " " + new_filename + " " + str(scale) -# ) -# except Exception as ex2: -# print("Resize could not be done:", ex2) -# return new_filename - -# for _, row in input_df.iterrows(): -# # ensure opm mask size check is triggered -# _, _ = generate_initial_mask(resize_for_ci(row["Channel_0"], scale=2), 1) - -# for patch_size in [ -# [128, 128], -# "[100m,100m]", -# "[100mx100m]", -# "[100mX100m]", -# "[100m*100m]", -# ]: -# _ = get_patch_size_in_microns(row["Channel_0"], patch_size) - -# # try to break resizer -# new_filename = resize_for_ci(row["Channel_0"], scale=10) -# row["Channel_0"] = new_filename -# files_to_delete.append(new_filename) -# # we do not need the last subject -# break - -# resized_inference_data_list = os.path.join( -# inputDir, "train_2d_histo_classification_resize.csv" -# ) -# # drop last subject -# input_df.drop(index=input_df.index[-1], axis=0, inplace=True) -# input_df.to_csv(resized_inference_data_list, index=False) -# files_to_delete.append(resized_inference_data_list) - -# file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") -# temp_df = pd.read_csv(file_for_Training) -# temp_df.drop("Label", axis=1, inplace=True) -# temp_df["valuetopredict"] = np.random.randint(2, size=len(temp_df)) -# temp_df.to_csv(file_for_Training, index=False) -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "histo" -# parameters["patch_size"] = parameters_patch["patch_size"][0] -# file_config_temp = write_temp_config_path(parameters) -# parameters = ConfigManager(file_config_temp, version_check_flag=False) -# parameters["model"]["dimension"] = 2 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["architecture"] = "densenet121" -# parameters["model"]["norm_type"] = "none" -# parameters["data_preprocessing"]["rgba2rgb"] = "" -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# parameters["nested_training"]["testing"] = 1 -# parameters["nested_training"]["validation"] = -2 -# parameters["model"]["print_summary"] = False -# modelDir = os.path.join(outputDir, "modelDir") -# if os.path.isdir(modelDir): -# shutil.rmtree(modelDir) -# Path(modelDir).mkdir(parents=True, exist_ok=True) -# TrainingManager( -# dataframe=training_data, -# outputDir=modelDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# parameters["output_dir"] = modelDir # this is in inference mode -# parameters["data_preprocessing"]["resize_patch"] = parameters_patch["patch_size"] -# parameters["patch_size"] = [ -# parameters_patch["patch_size"][0] * 10, -# parameters_patch["patch_size"][1] * 10, -# ] -# parameters["nested_training"]["validation"] = 1 -# inference_data, parameters["headers"] = parseTrainingCSV( -# resized_inference_data_list, train=False -# ) -# for model_type in all_model_type: -# parameters["model"]["type"] = model_type -# InferenceManager( -# dataframe=inference_data, -# modelDir=modelDir, -# parameters=parameters, -# device=device, -# ) -# all_folders_in_modelDir = os.listdir(modelDir) -# for folder in all_folders_in_modelDir: -# output_subject_dir = os.path.join(modelDir, folder) -# if os.path.isdir(output_subject_dir): -# # check in the default outputDir that's created - this is based on a unique timestamp -# if folder != "output_validation": -# # if 'predictions.csv' are not found, give error -# assert os.path.exists( -# os.path.join( -# output_subject_dir, -# str(input_df["SubjectID"][0]), -# "predictions.csv", -# ) -# ), "predictions.csv not found" -# # ensure previous results are removed -# sanitize_outputDir() - -# for file in files_to_delete: -# os.remove(file) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_inference_classification_histology_2d(device): -# print("36: Starting histology train/inference classification tests") -# # overwrite previous results -# sanitize_outputDir() -# output_dir_patches = os.path.join(outputDir, "histo_patches") -# if os.path.isdir(output_dir_patches): -# shutil.rmtree(output_dir_patches) -# Path(output_dir_patches).mkdir(parents=True, exist_ok=True) -# output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") - -# parameters_patch = {} -# # extracting minimal number of patches to ensure that the test does not take too long -# parameters_patch["patch_size"] = [128, 128] - -# for num_patches in [-1, 3]: -# parameters_patch["num_patches"] = num_patches -# file_config_temp = write_temp_config_path(parameters_patch) - -# if os.path.exists(output_dir_patches_output): -# shutil.rmtree(output_dir_patches_output) -# # this ensures that the output directory for num_patches=3 is preserved -# Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) -# patch_extraction( -# inputDir + "/train_2d_histo_classification.csv", -# output_dir_patches_output, -# file_config_temp, -# ) - -# file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") -# temp_df = pd.read_csv(file_for_Training) -# temp_df.drop("Label", axis=1, inplace=True) -# temp_df["valuetopredict"] = np.random.randint(2, size=6) -# temp_df.to_csv(file_for_Training, index=False) -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "histo" -# parameters["patch_size"] = 128 -# file_config_temp = write_temp_config_path(parameters) -# parameters = ConfigManager(file_config_temp, version_check_flag=False) -# parameters["model"]["dimension"] = 2 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["architecture"] = "densenet121" -# parameters["model"]["norm_type"] = "none" -# parameters["data_preprocessing"]["rgba2rgb"] = "" -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# parameters["nested_training"]["testing"] = 1 -# parameters["nested_training"]["validation"] = -2 -# parameters["model"]["print_summary"] = False -# modelDir = os.path.join(outputDir, "modelDir") -# if os.path.isdir(modelDir): -# shutil.rmtree(modelDir) -# Path(modelDir).mkdir(parents=True, exist_ok=True) -# TrainingManager( -# dataframe=training_data, -# outputDir=modelDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# parameters["output_dir"] = modelDir # this is in inference mode -# inference_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_histo_classification.csv", train=False -# ) -# for model_type in all_model_type: -# parameters["nested_training"]["testing"] = 1 -# parameters["nested_training"]["validation"] = -2 -# parameters["output_dir"] = modelDir # this is in inference mode -# inference_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_histo_segmentation.csv", train=False -# ) -# parameters["model"]["type"] = model_type -# InferenceManager( -# dataframe=inference_data, -# modelDir=modelDir, -# parameters=parameters, -# device=device, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_segmentation_unet_layerchange_rad_2d(device): -# # test case to up code coverage --> test decreasing allowed layers for unet -# print("37: Starting 2D Rad segmentation tests for normtype") -# # read and parse csv -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# for model in ["unet_multilayer", "lightunet_multilayer", "unetr"]: -# parameters["model"]["architecture"] = model -# parameters["patch_size"] = [4, 4, 1] -# parameters["model"]["dimension"] = 2 - -# # this assertion should fail -# with pytest.raises(BaseException) as _: -# global_models_dict[parameters["model"]["architecture"]]( -# parameters=parameters -# ) - -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["depth"] = 7 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["print_summary"] = False -# parameters["model"]["num_channels"] = 3 -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# parameters["model"]["norm_type"] = "batch" -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# if os.path.isdir(outputDir): -# shutil.rmtree(outputDir) # overwrite previous results -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_segmentation_unetr_rad_3d(device): -# print("38: Testing UNETR for 3D segmentation") -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_segmentation.csv" -# ) -# parameters["model"]["architecture"] = "unetr" -# parameters["patch_size"] = [4, 4, 4] -# parameters["model"]["dimension"] = 3 -# parameters["model"]["depth"] = 2 -# parameters["model"]["print_summary"] = False - -# # this assertion should fail -# with pytest.raises(BaseException) as _: -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# parameters["model"]["dimension"] = 3 -# parameters["patch_size"] = [32, 32, 32] - -# with pytest.raises(BaseException) as _: -# parameters["model"]["inner_patch_size"] = 19 -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# with pytest.raises(BaseException) as _: -# parameters["model"]["inner_patch_size"] = 64 -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# for patch in [16, 8]: -# parameters["model"]["inner_patch_size"] = patch -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["num_channels"] = len( -# parameters["headers"]["channelHeaders"] -# ) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# parameters["model"]["norm_type"] = "batch" -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# if os.path.isdir(outputDir): -# shutil.rmtree(outputDir) # overwrite previous results -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_segmentation_unetr_rad_2d(device): -# print("39: Testing UNETR for 2D segmentation") -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# parameters["model"]["architecture"] = "unetr" -# parameters["patch_size"] = [128, 128, 1] -# parameters["model"]["dimension"] = 2 - -# for patch in [16, 8]: -# parameters["model"]["inner_patch_size"] = patch -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["print_summary"] = False -# parameters["model"]["num_channels"] = 3 -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# parameters["model"]["norm_type"] = "batch" -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# if os.path.isdir(outputDir): -# shutil.rmtree(outputDir) # overwrite previous results -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_segmentation_transunet_rad_2d(device): -# print("40: Testing TransUNet for 2D segmentation") -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# parameters["model"]["architecture"] = "transunet" -# parameters["patch_size"] = [128, 128, 1] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["print_summary"] = False - -# with pytest.raises(BaseException) as _: -# parameters["model"]["num_heads"] = 6 -# parameters["model"]["embed_dim"] = 64 -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# with pytest.raises(BaseException) as _: -# parameters["model"]["num_heads"] = 3 -# parameters["model"]["embed_dim"] = 50 -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# parameters["model"]["embed_dim"] = 64 -# parameters["model"]["depth"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["num_heads"] = 8 -# parameters["model"]["amp"] = True -# parameters["model"]["num_channels"] = 3 -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# parameters["model"]["norm_type"] = "batch" -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# if os.path.isdir(outputDir): -# shutil.rmtree(outputDir) # overwrite previous results -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_segmentation_transunet_rad_3d(device): -# print("41: Testing TransUNet for 3D segmentation") -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_segmentation.csv" -# ) -# parameters["model"]["architecture"] = "transunet" -# parameters["patch_size"] = [4, 4, 4] -# parameters["model"]["dimension"] = 3 -# parameters["model"]["print_summary"] = False - -# # this assertion should fail -# with pytest.raises(BaseException) as _: -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# parameters["model"]["dimension"] = 3 -# parameters["patch_size"] = [32, 32, 32] - -# with pytest.raises(BaseException) as _: -# parameters["model"]["depth"] = 1 -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# with pytest.raises(BaseException) as _: -# parameters["model"]["num_heads"] = 6 -# parameters["model"]["embed_dim"] = 64 -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# with pytest.raises(BaseException) as _: -# parameters["model"]["num_heads"] = 3 -# parameters["model"]["embed_dim"] = 50 -# global_models_dict[parameters["model"]["architecture"]](parameters=parameters) - -# parameters["model"]["num_heads"] = 8 -# parameters["model"]["embed_dim"] = 64 -# parameters["model"]["depth"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# parameters["model"]["norm_type"] = "batch" -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# if os.path.isdir(outputDir): -# shutil.rmtree(outputDir) # overwrite previous results -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_train_gradient_clipping_classification_rad_2d(device): -# print("42: Testing gradient clipping") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["modality"] = "rad" -# parameters["track_memory_usage"] = True -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# # read and parse csv -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_classification.csv" -# ) -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # ensure gradient clipping is getting tested -# for clip_mode in ["norm", "value", "agc"]: -# parameters["model"]["architecture"] = "imagenet_vgg11" -# parameters["model"]["final_layer"] = "softmax" -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# parameters["clip_mode"] = clip_mode -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# sanitize_outputDir() - -# print("passed") - - -# def test_train_segmentation_unet_conversion_rad_3d(device): -# print("43: Starting 3D Rad segmentation tests for unet with ACS conversion") -# # read and parse csv -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_3d_rad_segmentation.csv" -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 -# parameters["model"]["class_list"] = [0, 1] -# parameters["model"]["final_layer"] = "softmax" -# parameters["model"]["amp"] = True -# parameters["in_memory"] = True -# parameters["verbose"] = False -# parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# # loop through selected models and train for single epoch -# for model in ["unet", "unet_multilayer", "lightunet_multilayer"]: -# for converter_type in ["acs", "soft", "conv3d"]: -# parameters["model"]["converter_type"] = converter_type -# parameters["model"]["architecture"] = model -# parameters["nested_training"]["testing"] = -5 -# parameters["nested_training"]["validation"] = -5 -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_cli_function_configgenerator(): -# print("44: Starting testing cli function for config generator") -# base_config_path = os.path.join(baseConfigDir, "config_all_options.yaml") -# generator_config_path = os.path.join( -# baseConfigDir, "config_generator_sample_strategy.yaml" -# ) -# sanitize_outputDir() -# config_generator(base_config_path, generator_config_path, outputDir) -# all_files = os.listdir(outputDir) -# assert len(all_files) == 72, "config generator did not generate all files" - -# for file in all_files: -# parameters = None -# with suppress_stdout_stderr(): -# parameters = ConfigManager( -# os.path.join(outputDir, file), version_check_flag=False -# ) -# assert parameters, "config generator did not generate valid config files" -# sanitize_outputDir() - -# generator_config = yaml.safe_load(open(generator_config_path, "r")) -# generator_config["second_level_dict_that_should_fail"] = { -# "key_1": {"key_2": "value"} -# } - -# file_config_temp = write_temp_config_path(generator_config) - -# # test for failure -# with pytest.raises(Exception) as exc_info: -# config_generator(base_config_path, file_config_temp, outputDir) -# sanitize_outputDir() - -# print("Exception raised:", exc_info.value) - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_cli_function_recoverconfig(): -# print("45: Testing cli function for recover_config") -# # Train, then recover a config and see if it exists/is valid YAML - -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) -# # patch_size is custom for sdnet -# parameters["patch_size"] = [224, 224, 1] -# parameters["batch_size"] = 2 -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["num_channels"] = 1 -# parameters["model"]["architecture"] = "sdnet" -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) -# output_config_path = write_temp_config_path(None) -# assert recover_config( -# outputDir, output_config_path -# ), "recover_config returned false" -# assert os.path.exists(output_config_path), "Didn't create a config file" - -# new_params = ConfigManager(output_config_path, version_check_flag=False) -# assert new_params, "Created YAML could not be parsed by ConfigManager" - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_deploy_docker(): -# print("46: Testing deployment of a model to Docker") -# # Train, then try deploying that model (requires an installed Docker engine) - -# deploymentOutputDir = os.path.join(outputDir, "mlcube") -# # read and parse csv -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) -# training_data, parameters["headers"] = parseTrainingCSV( -# inputDir + "/train_2d_rad_segmentation.csv" -# ) - -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["num_channels"] = 3 -# parameters["model"]["onnx_export"] = False -# parameters["model"]["print_summary"] = False -# parameters["data_preprocessing"]["resize_image"] = [224, 224] -# parameters["memory_save_mode"] = True - -# parameters = populate_header_in_parameters(parameters, parameters["headers"]) -# sanitize_outputDir() -# TrainingManager( -# dataframe=training_data, -# outputDir=outputDir, -# parameters=parameters, -# device=device, -# resume=False, -# reset=True, -# ) - -# custom_entrypoint = os.path.join( -# gandlfRootDir, -# "mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py", -# ) -# for entrypoint_script in [None, custom_entrypoint]: -# result = run_deployment( -# os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), -# deploymentOutputDir, -# "docker", -# "model", -# entrypoint_script=entrypoint_script, -# configfile=testingDir + "/config_segmentation.yaml", -# modeldir=outputDir, -# requires_gpu=True, -# ) -# msg = "run_deployment returned false" -# if entrypoint_script: -# msg += " with custom entrypoint script" -# assert result, msg - -# sanitize_outputDir() - -# print("passed") - - -# def test_collision_subjectid_test_segmentation_rad_2d(device): -# print("47: Starting 2D Rad segmentation tests for collision of subjectID in test") -# parameters = ConfigManager( -# testingDir + "/config_segmentation.yaml", version_check_flag=False -# ) - -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["num_epochs"] = 1 -# parameters["nested_training"]["testing"] = 1 -# parameters["model"]["dimension"] = 2 -# parameters["model"]["class_list"] = [0, 255] -# parameters["model"]["amp"] = True -# parameters["model"]["print_summary"] = False -# parameters["model"]["num_channels"] = 3 -# parameters["metrics"] = ["dice"] -# parameters["model"]["architecture"] = "unet" -# outputDir = os.path.join(testingDir, "data_output") - -# file_config_temp = write_temp_config_path(parameters) - -# # test the case where outputDir is explicitly provided to InferenceManager -# train_data_path = inputDir + "/train_2d_rad_segmentation.csv" -# test_data_path = inputDir + "/test_2d_rad_segmentation.csv" -# df = pd.read_csv(train_data_path) -# temp_df = pd.read_csv(train_data_path) -# # Concatenate the two dataframes -# df = pd.concat([df, temp_df], ignore_index=True) - -# df.to_csv(test_data_path, index=False) -# _, testing_data, _ = parseTestingCSV(test_data_path, outputDir) -# # Save testing data to a csv file -# testing_data.to_csv(test_data_path, index=False) - -# main_run( -# train_data_path + "," + train_data_path + "," + test_data_path, -# file_config_temp, -# outputDir, -# False, -# device, -# resume=False, -# reset=True, -# ) - -# sanitize_outputDir() - -# print("passed") - - -# def test_generic_random_numbers_are_deterministic_on_cpu(): -# print("48: Starting testing deterministic random numbers generation") - -# set_determinism(seed=42) -# a, b = np.random.rand(3, 3), np.random.rand(3, 3) - -# set_determinism(seed=42) -# c, d = np.random.rand(3, 3), np.random.rand(3, 3) - -# # Check that the generated random numbers are the same with numpy -# assert np.allclose(a, c) -# assert np.allclose(b, d) - -# e, f = [random.random() for _ in range(5)], [random.random() for _ in range(5)] - -# set_determinism(seed=42) -# g, h = [random.random() for _ in range(5)], [random.random() for _ in range(5)] - -# # Check that the generated random numbers are the same with Python's built-in random module -# assert e == g -# assert f == h - -# print("passed") - - -# def test_generic_cli_function_metrics_cli_rad_nd(): -# print("49: Starting metric calculation tests") -# for dim in ["2d", "3d"]: -# for problem_type in ["segmentation", "classification", "synthesis"]: -# synthesis_detected = problem_type == "synthesis" -# problem_type_wrap = problem_type -# if synthesis_detected: -# problem_type_wrap = "classification" -# # read and parse csv -# training_data, _ = parseTrainingCSV( -# inputDir + f"/train_{dim}_rad_{problem_type_wrap}.csv" -# ) -# if problem_type_wrap == "segmentation": -# labels_array = training_data["Label"] -# elif synthesis_detected: -# labels_array = training_data["Channel_0"] -# else: -# labels_array = training_data["ValueToPredict"] -# training_data["target"] = labels_array -# training_data["prediction"] = labels_array -# if synthesis_detected: -# # this optional -# training_data["mask"] = training_data["Label"] - -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + f"/config_{problem_type_wrap}.yaml", -# version_check_flag=False, -# ) -# parameters["modality"] = "rad" -# parameters["patch_size"] = patch_size["2D"] -# parameters["model"]["dimension"] = 2 -# if dim == "3d": -# parameters["patch_size"] = patch_size["3D"] -# parameters["model"]["dimension"] = 3 - -# parameters["verbose"] = False -# if synthesis_detected: -# parameters["problem_type"] = problem_type - -# temp_infer_csv = os.path.join(outputDir, "temp_csv.csv") -# training_data.to_csv(temp_infer_csv, index=False) - -# output_file = os.path.join(outputDir, "output.yaml") - -# temp_config = write_temp_config_path(parameters) - -# # run the metrics calculation -# generate_metrics_dict(temp_infer_csv, temp_config, output_file) - -# assert os.path.isfile(output_file), "Metrics output file was not generated" - -# sanitize_outputDir() +def test_train_segmentation_rad_2d(device): + print("03: Starting 2D Rad segmentation tests") + # read and parse csv + parameters = parseConfig( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["num_channels"] = 3 + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters["data_preprocessing"]["resize_image"] = [224, 224] + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # read and initialize parameters for specific data dimension + for model in all_models_segmentation: + if model == "imagenet_unet": + # imagenet_unet encoder needs to be toned down for small patch size + parameters["model"]["encoder_name"] = "mit_b0" + parameters["model"]["encoder_depth"] = 3 + parameters["model"]["decoder_channels"] = (64, 32, 16) + parameters["model"]["final_layer"] = random.choice( + ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] + ) + parameters["model"]["converter_type"] = random.choice( + ["acs", "soft", "conv3d"] + ) + + if model == "dynunet": + # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py + parameters["model"]["kernel_size"] = (3, 3, 3, 1) + parameters["model"]["strides"] = (1, 1, 1, 1) + parameters["model"]["deep_supervision"] = False + + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_segmentation_sdnet_rad_2d(device): + print("04: Starting 2D Rad segmentation tests") + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + # patch_size is custom for sdnet + parameters["patch_size"] = [224, 224, 1] + parameters["batch_size"] = 2 + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["num_channels"] = 1 + parameters["model"]["architecture"] = "sdnet" + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + sanitize_outputDir() + + sanitize_outputDir() + + print("passed") + + +def test_train_segmentation_rad_3d(device): + print("05: Starting 3D Rad segmentation tests") + # read and parse csv + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_segmentation.csv" + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + parameters["model"]["class_list"] = [0, 1] + parameters["model"]["final_layer"] = "softmax" + parameters["model"]["amp"] = True + parameters["in_memory"] = True + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for model in all_models_segmentation: + if model == "imagenet_unet": + # imagenet_unet encoder needs to be toned down for small patch size + parameters["model"]["encoder_name"] = "mit_b0" + with pytest.raises(Exception) as exc_info: + _ = global_models_dict[model](parameters) + print("Exception raised:", exc_info.value) + parameters["model"]["encoder_name"] = "resnet34" + parameters["model"]["encoder_depth"] = 3 + parameters["model"]["decoder_channels"] = (64, 32, 16) + parameters["model"]["final_layer"] = random.choice( + ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] + ) + parameters["model"]["converter_type"] = random.choice( + ["acs", "soft", "conv3d"] + ) + + if model == "dynunet": + # More info: https://github.com/Project-MONAI/MONAI/blob/96bfda00c6bd290297f5e3514ea227c6be4d08b4/tests/test_dynunet.py + parameters["model"]["kernel_size"] = (3, 3, 3, 1) + parameters["model"]["strides"] = (1, 1, 1, 1) + parameters["model"]["deep_supervision"] = False + + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_regression_rad_2d(device): + print("06: Starting 2D Rad regression tests") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_regression.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["amp"] = False + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_regression.csv" + ) + parameters["model"]["num_channels"] = 3 + parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] + parameters["scaling_factor"] = 1 + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for model in all_models_regression: + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_regression_rad_2d_imagenet(device): + print("07: Starting 2D Rad regression tests for imagenet models") + # read and initialize parameters for specific data dimension + print("Starting 2D Rad regression tests for imagenet models") + parameters = ConfigManager( + testingDir + "/config_regression.yaml", version_check_flag=False + ) + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["amp"] = False + parameters["model"]["print_summary"] = False + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_regression.csv" + ) + parameters["model"]["num_channels"] = 3 + parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] + parameters["scaling_factor"] = 1 + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for model in all_models_classification: + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = 1 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_regression_brainage_rad_2d(device): + print("08: Starting brain age tests") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_regression.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["amp"] = False + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_regression.csv" + ) + parameters["model"]["num_channels"] = 3 + parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] + parameters["scaling_factor"] = 1 + parameters["model"]["architecture"] = "brain_age" + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + # parameters_temp = copy.deepcopy(parameters) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + # file_config_temp = write_temp_config_path(parameters_temp) + model_path = os.path.join(outputDir, "brain_age_best.pth.tar") + config_path = os.path.join(outputDir, "parameters.pkl") + optimization_result = post_training_model_optimization(model_path, config_path) + assert optimization_result == False, "Optimization should fail" + + sanitize_outputDir() + + print("passed") + + +def test_train_regression_rad_3d(device): + print("09: Starting 3D Rad regression tests") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_regression.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_regression.csv" + ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["class_list"] = parameters["headers"]["predictionHeaders"] + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for model in all_models_regression: + if "efficientnet" in model: + parameters["patch_size"] = [16, 16, 16] + else: + parameters["patch_size"] = patch_size["3D"] + + if model == "imagenet_unet": + parameters["model"]["depth"] = 2 + parameters["model"]["decoder_channels"] = [32, 16] + parameters["model"]["encoder_weights"] = "None" + parameters["model"]["converter_type"] = random.choice( + ["acs", "soft", "conv3d"] + ) + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_classification_rad_2d(device): + print("10: Starting 2D Rad classification tests") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["track_memory_usage"] = True + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = 3 + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for model in all_models_regression: + if model == "imagenet_unet": + parameters["model"]["depth"] = 2 + parameters["model"]["decoder_channels"] = [32, 16] + parameters["model"]["encoder_weights"] = "None" + parameters["model"]["converter_type"] = random.choice( + ["acs", "soft", "conv3d"] + ) + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + # ensure sigmoid and softmax activations are tested for imagenet models + for activation_type in ["sigmoid", "softmax"]: + parameters["model"]["architecture"] = "imagenet_vgg11" + parameters["model"]["final_layer"] = activation_type + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_classification_rad_3d(device): + print("11: Starting 3D Rad classification tests") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + # loop through selected models and train for single epoch + for model in all_models_regression: + if "efficientnet" in model: + parameters["patch_size"] = [16, 16, 16] + else: + parameters["patch_size"] = patch_size["3D"] + if model == "imagenet_unet": + parameters["model"]["encoder_name"] = "efficientnet-b0" + parameters["model"]["depth"] = 1 + parameters["model"]["decoder_channels"] = [64] + parameters["model"]["final_layer"] = random.choice( + ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] + ) + parameters["model"]["converter_type"] = random.choice( + ["acs", "soft", "conv3d"] + ) + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_resume_inference_classification_rad_3d(device): + print("12: Starting 3D Rad classification tests for resume and reset") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + model = all_models_regression[0] + parameters["model"]["architecture"] = model + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + ## testing resume with parameter updates + parameters["num_epochs"] = 2 + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + parameters["model"]["save_at_every_epoch"] = True + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=True, + reset=False, + ) + + ## testing resume without parameter updates + parameters["num_epochs"] = 1 + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=False, + ) + + parameters["output_dir"] = outputDir # this is in inference mode + InferenceManager( + dataframe=training_data, + modelDir=outputDir, + parameters=parameters, + device=device, + ) + sanitize_outputDir() + + print("passed") + + +def test_train_inference_optimize_classification_rad_3d(device): + print("13: Starting 3D Rad segmentation tests for optimization") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["architecture"] = all_models_regression[0] + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + # parameters_temp = copy.deepcopy(parameters) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + # file_config_temp = write_temp_config_path(parameters_temp) + model_path = os.path.join(outputDir, all_models_regression[0] + "_best.pth.tar") + config_path = os.path.join(outputDir, "parameters.pkl") + optimization_result = post_training_model_optimization(model_path, config_path) + assert optimization_result == True, "Optimization should pass" + + ## testing inference + for model_type in all_model_type: + parameters["model"]["type"] = model_type + parameters["output_dir"] = outputDir # this is in inference mode + InferenceManager( + dataframe=training_data, + modelDir=outputDir, + parameters=parameters, + device=device, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_inference_optimize_segmentation_rad_2d(device): + print("14: Starting 2D Rad segmentation tests for optimization") + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + parameters["patch_size"] = patch_size["2D"] + parameters["modality"] = "rad" + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["save_output"] = True + parameters["model"]["num_channels"] = 3 + parameters["metrics"] = ["dice"] + parameters["model"]["architecture"] = "resunet" + parameters["model"]["onnx_export"] = True + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + ## testing inference + for model_type in all_model_type: + parameters["model"]["type"] = model_type + parameters["output_dir"] = outputDir # this is in inference mode + InferenceManager( + dataframe=training_data, + modelDir=outputDir, + parameters=parameters, + device=device, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_inference_classification_with_logits_single_fold_rad_3d(device): + print("15: Starting 3D Rad classification tests for single fold logits inference") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + parameters["model"]["final_layer"] = "logits" + # loop through selected models and train for single epoch + model = all_models_regression[0] + parameters["model"]["architecture"] = model + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + ## add stratified splitting + parameters["nested_training"]["stratified"] = True + + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # duplicate the data to test stratified sampling + training_data_duplicate = training_data._append(training_data) + for _ in range(1): + training_data_duplicate = training_data_duplicate._append( + training_data_duplicate + ) + training_data_duplicate.reset_index(drop=True, inplace=True) + # ensure subjects are not duplicated + training_data_duplicate["SubjectID"] = training_data_duplicate.index + + # ensure every part of the code is tested + for folds in [2, 1, -5]: + ## add stratified folding information + parameters["nested_training"]["testing"] = folds + parameters["nested_training"]["validation"] = folds if folds != 1 else -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data_duplicate, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + ## this is to test if inference can run without having ground truth column + training_data.drop("ValueToPredict", axis=1, inplace=True) + training_data.drop("Label", axis=1, inplace=True) + temp_infer_csv = os.path.join(outputDir, "temp_infer_csv.csv") + training_data.to_csv(temp_infer_csv, index=False) + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV(temp_infer_csv) + parameters["output_dir"] = outputDir # this is in inference mode + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + parameters["model"]["final_layer"] = "logits" + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + model = all_models_regression[0] + parameters["model"]["architecture"] = model + parameters["model"]["onnx_export"] = False + InferenceManager( + dataframe=training_data, + modelDir=outputDir, + parameters=parameters, + device=device, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_inference_classification_with_logits_multiple_folds_rad_3d(device): + print("16: Starting 3D Rad classification tests for multi-fold logits inference") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + parameters["model"]["final_layer"] = "logits" + # necessary for n-fold cross-validation inference + parameters["nested_training"]["validation"] = 2 + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + model = all_models_regression[0] + parameters["model"]["architecture"] = model + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + parameters["output_dir"] = outputDir # this is in inference mode + InferenceManager( + dataframe=training_data, + modelDir=outputDir + "," + outputDir, + parameters=parameters, + device=device, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_scheduler_classification_rad_2d(device): + print("17: Starting 2D Rad segmentation tests for scheduler") + # read and initialize parameters for specific data dimension + # loop through selected models and train for single epoch + for scheduler in global_schedulers_dict: + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = 3 + parameters["model"]["architecture"] = "densenet121" + parameters["model"]["norm_type"] = "instance" + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters["scheduler"] = {} + parameters["scheduler"]["type"] = scheduler + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + ## ensure parameters are parsed every single time + file_config_temp = write_temp_config_path(parameters) + + parameters = ConfigManager(file_config_temp, version_check_flag=False) + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_optimizer_classification_rad_2d(device): + print("18: Starting 2D Rad classification tests for optimizer") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = 3 + parameters["model"]["architecture"] = "densenet121" + parameters["model"]["norm_type"] = "none" + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for optimizer in global_optimizer_dict: + parameters["optimizer"] = {} + parameters["optimizer"]["type"] = optimizer + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + if os.path.exists(outputDir): + shutil.rmtree(outputDir) # overwrite previous results + Path(outputDir).mkdir(parents=True, exist_ok=True) + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_clip_train_classification_rad_3d(device): + print("19: Starting 3D Rad classification tests for clipping") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["architecture"] = "vgg16" + parameters["model"]["norm_type"] = "None" + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for clip_mode in all_clip_modes: + parameters["clip_mode"] = clip_mode + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + sanitize_outputDir() + + print("passed") + + +def test_train_normtype_segmentation_rad_3d(device): + print("20: Starting 3D Rad segmentation tests for normtype") + # read and initialize parameters for specific data dimension + # read and parse csv + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_segmentation.csv" + ) + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + parameters["model"]["class_list"] = [0, 1] + parameters["model"]["amp"] = True + parameters["save_output"] = True + parameters["data_postprocessing"] = {"fill_holes"} + parameters["in_memory"] = True + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + + # these should raise exceptions + for norm_type in ["none", None]: + parameters["model"]["norm_type"] = norm_type + file_config_temp = write_temp_config_path(parameters) + with pytest.raises(Exception) as exc_info: + parameters = ConfigManager(file_config_temp, version_check_flag=False) + + print("Exception raised:", exc_info.value) + + # loop through selected models and train for single epoch + for norm in all_norm_types: + for model in ["resunet", "unet", "fcn", "unetr"]: + parameters["model"]["architecture"] = model + parameters["model"]["norm_type"] = norm + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + if os.path.isdir(outputDir): + shutil.rmtree(outputDir) # overwrite previous results + Path(outputDir).mkdir(parents=True, exist_ok=True) + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_metrics_segmentation_rad_2d(device): + print("21: Starting 2D Rad segmentation tests for metrics") + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["data_postprocessing"] = {"mapping": {0: 0, 255: 1}} + parameters["model"]["amp"] = True + parameters["save_output"] = True + parameters["model"]["num_channels"] = 3 + parameters["metrics"] = [ + "dice", + "hausdorff", + "hausdorff95", + "normalized_surface_dice", + "sensitivity", + "sensitivity_per_label", + "specificity_segmentation", + "specificity_segmentation_per_label", + "jaccard", + "jaccard_per_label", + ] + parameters["model"]["architecture"] = "resunet" + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + file_config_temp = write_temp_config_path(parameters) + + parameters = ConfigManager(file_config_temp, version_check_flag=False) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_metrics_regression_rad_2d(device): + print("22: Starting 2D Rad regression tests for metrics") + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_regression.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_regression.csv" + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["norm_type"] = "instance" + parameters["model"]["amp"] = False + parameters["model"]["num_channels"] = 3 + parameters["model"]["architecture"] = "vgg11" + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = True + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_losses_segmentation_rad_2d(device): + print("23: Starting 2D Rad segmentation tests for losses") + + # healper function to read and parse yaml and return parameters + def get_parameters_after_alteration(loss_type: str) -> dict: + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + parameters["loss_function"] = loss_type + file_config_temp = write_temp_config_path(parameters) + # read and parse csv + parameters = ConfigManager(file_config_temp, version_check_flag=True) + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + # disabling amp because some losses do not support Half, yet + parameters["model"]["amp"] = False + parameters["model"]["num_channels"] = 3 + parameters["model"]["architecture"] = "resunet" + parameters["metrics"] = ["dice"] + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + return parameters, training_data + + # loop through selected models and train for single epoch + for loss_type in [ + "dc", + "dc_log", + "dcce", + "dcce_logits", + "tversky", + "focal", + "dc_focal", + "mcc", + "mcc_log", + ]: + parameters, training_data = get_parameters_after_alteration(loss_type) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_generic_config_read(): + print("24: Starting testing reading configuration") + parameters = ConfigManager( + os.path.join(baseConfigDir, "config_all_options.yaml"), version_check_flag=False + ) + parameters["data_preprocessing"]["resize_image"] = [128, 128] + + file_config_temp = write_temp_config_path(parameters) + + # read and parse csv + parameters = ConfigManager(file_config_temp, version_check_flag=True) + + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + assert parameters is not None, "parameters is None" + data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") + assert data_loader is not None, "data_loader is None" + + os.remove(file_config_temp) + + # ensure resize_image is triggered + parameters["data_preprocessing"].pop("resample") + parameters["data_preprocessing"].pop("resample_min") + parameters["data_preprocessing"]["resize_image"] = [128, 128] + parameters["model"]["print_summary"] = False + + with open(file_config_temp, "w") as file: + yaml.dump(parameters, file) + + parameters = ConfigManager(file_config_temp, version_check_flag=True) + + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + assert parameters is not None, "parameters is None" + data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") + assert data_loader is not None, "data_loader is None" + + os.remove(file_config_temp) + + # ensure resize_patch is triggered + parameters["data_preprocessing"].pop("resize_image") + parameters["data_preprocessing"]["resize_patch"] = [64, 64] + + with open(file_config_temp, "w") as file: + yaml.dump(parameters, file) + + parameters = ConfigManager(file_config_temp, version_check_flag=True) + + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + assert parameters is not None, "parameters is None" + data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") + assert data_loader is not None, "data_loader is None" + + os.remove(file_config_temp) + + # ensure resize_image is triggered + parameters["data_preprocessing"].pop("resize_patch") + parameters["data_preprocessing"]["resize"] = [64, 64] + + with open(file_config_temp, "w") as file: + yaml.dump(parameters, file) + + parameters = ConfigManager(file_config_temp, version_check_flag=True) + + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + assert parameters is not None, "parameters is None" + data_loader = ImagesFromDataFrame(training_data, parameters, True, "unit_test") + assert data_loader is not None, "data_loader is None" + + os.remove(file_config_temp) + + sanitize_outputDir() + + print("passed") + + +def test_generic_cli_function_preprocess(): + print("25: Starting testing cli function preprocess") + file_config = os.path.join(testingDir, "config_segmentation.yaml") + sanitize_outputDir() + file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") + + input_data_df, _ = parseTrainingCSV(file_data, train=False) + # add random metadata to ensure it gets preserved + input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] + input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) + input_data_df["metadata_test_int"] = np.random.randint( + 0, 100, input_data_df.shape[0] + ) + temp_csv = os.path.join(outputDir, "temp.csv") + input_data_df.to_csv(temp_csv) + + parameters = ConfigManager(file_config) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = "[0, 255||125]" + # disabling amp because some losses do not support Half, yet + parameters["model"]["amp"] = False + parameters["model"]["print_summary"] = False + parameters["model"]["num_channels"] = 3 + parameters["model"]["architecture"] = "unet" + parameters["metrics"] = ["dice"] + parameters["patch_sampler"] = { + "type": "label", + "enable_padding": True, + "biased_sampling": True, + } + parameters["weighted_loss"] = True + parameters["save_output"] = True + parameters["data_preprocessing"]["to_canonical"] = None + parameters["data_preprocessing"]["rgba_to_rgb"] = None + + file_config_temp = write_temp_config_path(parameters) + + preprocess_and_save(temp_csv, file_config_temp, outputDir) + training_data, parameters["headers"] = parseTrainingCSV( + outputDir + "/data_processed.csv" + ) + + # check that the length of training data is what we expect + assert ( + len(training_data) == input_data_df.shape[0] + ), "Number of subjects in dataframe is not same as that of input dataframe" + assert ( + len(training_data.columns) == len(input_data_df.columns) + 1 + ), "Number of columns in output dataframe is not same as that of input dataframe" # the +1 is for the added index column + sanitize_outputDir() + + ## regression/classification preprocess + file_config = os.path.join(testingDir, "config_regression.yaml") + parameters = ConfigManager(file_config) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["amp"] = False + # read and parse csv + parameters["model"]["num_channels"] = 3 + parameters["scaling_factor"] = 1 + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters["data_preprocessing"]["to_canonical"] = None + parameters["data_preprocessing"]["rgba_to_rgb"] = None + file_data = os.path.join(inputDir, "train_2d_rad_regression.csv") + input_data_df, _ = parseTrainingCSV(file_data, train=False) + # add random metadata to ensure it gets preserved + input_data_df["metadata_test_string"] = input_data_df.shape[0] * ["test"] + input_data_df["metadata_test_float"] = np.random.rand(input_data_df.shape[0]) + input_data_df["metadata_test_int"] = np.random.randint( + 0, 100, input_data_df.shape[0] + ) + input_data_df.to_csv(temp_csv) + + # store this separately for preprocess testing + with open(file_config_temp, "w") as outfile: + yaml.dump(parameters, outfile, default_flow_style=False) + + preprocess_and_save(temp_csv, file_config_temp, outputDir) + training_data, parameters["headers"] = parseTrainingCSV( + outputDir + "/data_processed.csv" + ) + + # check that the length of training data is what we expect + assert ( + len(training_data) == input_data_df.shape[0] + ), "Number of subjects in dataframe is not same as that of input dataframe" + assert ( + len(training_data.columns) == len(input_data_df.columns) + 1 + ), "Number of columns in output dataframe is not same as that of input dataframe" # the +1 is for the added index column + sanitize_outputDir() + + print("passed") + + +def test_generic_cli_function_mainrun(device): + print("26: Starting testing cli function main_run") + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["num_epochs"] = 1 + parameters["nested_training"]["testing"] = 1 + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["print_summary"] = False + parameters["model"]["num_channels"] = 3 + parameters["metrics"] = ["dice"] + parameters["model"]["architecture"] = "unet" + + file_config_temp = write_temp_config_path(parameters) + + file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") + + main_run( + file_data, file_config_temp, outputDir, True, device, resume=False, reset=True + ) + sanitize_outputDir() + + with open(file_config_temp, "w") as file: + yaml.dump(parameters, file) + + # testing train/valid split + main_run( + file_data + "," + file_data, + file_config_temp, + outputDir, + True, + device, + resume=False, + reset=True, + ) + + with open(file_config_temp, "w") as file: + yaml.dump(parameters, file) + + # testing train/valid/test split with resume + main_run( + file_data + "," + file_data + "," + file_data, + file_config_temp, + outputDir, + True, + device, + resume=True, + reset=False, + ) + sanitize_outputDir() + + print("passed") + + +def test_dataloader_construction_train_segmentation_3d(device): + print("27: Starting 3D Rad segmentation tests") + # read and parse csv + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + params_all_preprocessing_and_augs = ConfigManager( + os.path.join(baseConfigDir, "config_all_options.yaml") + ) + + # take preprocessing and augmentations from all options + for key in ["data_preprocessing", "data_augmentation"]: + parameters[key] = params_all_preprocessing_and_augs[key] + + # customize parameters to maximize test coverage + parameters["data_preprocessing"].pop("normalize", None) + parameters["data_preprocessing"]["normalize_nonZero"] = None + parameters["data_preprocessing"]["default_probability"] = 1 + parameters.pop("nested_training", None) + parameters["nested_training"] = {} + parameters["nested_training"]["testing"] = 1 + parameters["nested_training"]["validation"] = -5 + + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_segmentation.csv" + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["save_training"] = True + parameters["save_output"] = True + parameters["model"]["dimension"] = 3 + parameters["model"]["class_list"] = [0, 1] + parameters["model"]["amp"] = True + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["architecture"] = "unet" + parameters["weighted_loss"] = False + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters["data_postprocessing"]["mapping"] = {0: 0, 1: 1} + parameters["data_postprocessing"]["fill_holes"] = True + parameters["data_postprocessing"]["cca"] = True + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_generic_preprocess_functions(): + print("28: Starting testing preprocessing functions") + # initialize an input which has values between [-1,1] + # checking tensor with last dimension of size 1 + input_tensor = torch.rand(4, 256, 256, 1) + input_transformed = global_preprocessing_dict["rgba2rgb"]()(input_tensor) + assert input_transformed.shape[0] == 3, "Number of channels is not 3" + assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" + + input_tensor = torch.rand(3, 256, 256, 1) + input_transformed = global_preprocessing_dict["rgb2rgba"]()(input_tensor) + assert input_transformed.shape[0] == 4, "Number of channels is not 4" + assert input_transformed.shape[1:] == input_tensor.shape[1:], "Shape mismatch" + + input_tensor = 2 * torch.rand(3, 256, 256, 1) - 1 + input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) + input_tensor = 2 * torch.rand(1, 3, 256, 256) - 1 + input_transformed = global_preprocessing_dict["normalize_imagenet"](input_tensor) + input_transformed = global_preprocessing_dict["normalize_standardize"](input_tensor) + input_transformed = global_preprocessing_dict["normalize_div_by_255"](input_tensor) + parameters_dict = {} + parameters_dict["min"] = 0.25 + parameters_dict["max"] = 0.75 + input_transformed = global_preprocessing_dict["threshold"](parameters_dict)( + input_tensor + ) + assert ( + torch.count_nonzero( + input_transformed[input_transformed < parameters_dict["min"]] + > parameters_dict["max"] + ) + == 0 + ), "Input should be thresholded" + + input_transformed = global_preprocessing_dict["clip"](parameters_dict)(input_tensor) + assert ( + torch.count_nonzero( + input_transformed[input_transformed < parameters_dict["min"]] + > parameters_dict["max"] + ) + == 0 + ), "Input should be clipped" + + non_zero_normalizer = global_preprocessing_dict["normalize_nonZero_masked"] + input_transformed = non_zero_normalizer(input_tensor) + non_zero_normalizer = global_preprocessing_dict["normalize_positive"] + input_transformed = non_zero_normalizer(input_tensor) + non_zero_normalizer = global_preprocessing_dict["normalize_nonZero"] + input_transformed = non_zero_normalizer(input_tensor) + + ## stain_normalization checks + input_tensor = 2 * torch.rand(3, 256, 256, 1) + 10 + training_data, _ = parseTrainingCSV(inputDir + "/train_2d_rad_segmentation.csv") + parameters_temp = {} + parameters_temp["data_preprocessing"] = {} + parameters_temp["data_preprocessing"]["stain_normalizer"] = { + "target": training_data["Channel_0"][0] + } + for extractor in ["ruifrok", "macenko", "vahadane"]: + parameters_temp["data_preprocessing"]["stain_normalizer"][ + "extractor" + ] = extractor + non_zero_normalizer = global_preprocessing_dict["stain_normalizer"]( + parameters_temp["data_preprocessing"]["stain_normalizer"] + ) + input_transformed = non_zero_normalizer(input_tensor) + + ## histogram matching tests + # histogram equalization + input_tensor = torch.rand(1, 64, 64, 64) + parameters_temp = {} + parameters_temp["data_preprocessing"] = {} + parameters_temp["data_preprocessing"]["histogram_matching"] = {} + non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( + parameters_temp["data_preprocessing"]["histogram_matching"] + ) + input_transformed = non_zero_normalizer(input_tensor) + # adaptive histogram equalization + parameters_temp = {} + parameters_temp["data_preprocessing"] = {} + parameters_temp["data_preprocessing"]["histogram_matching"] = {"target": "adaptive"} + non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( + parameters_temp["data_preprocessing"]["histogram_matching"] + ) + input_transformed = non_zero_normalizer(input_tensor) + # histogram matching + training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_segmentation.csv") + parameters_temp = {} + parameters_temp["data_preprocessing"] = {} + parameters_temp["data_preprocessing"]["histogram_matching"] = { + "target": training_data["Channel_0"][0] + } + non_zero_normalizer = global_preprocessing_dict["histogram_matching"]( + parameters_temp["data_preprocessing"]["histogram_matching"] + ) + input_transformed = non_zero_normalizer(input_tensor) + + # fill holes + input_tensor = torch.rand(1, 256, 256, 256) > 0.5 + input_transformed = fill_holes(input_tensor) + + ## CCA tests + # 3d + input_tensor = torch.rand(1, 256, 256, 256) > 0.5 + input_transformed = cca(input_tensor) + # 2d + input_tensor = torch.rand(1, 256, 256) > 0.5 + input_transformed = cca(input_tensor) + # 2d rgb + input_tensor = torch.rand(1, 3, 256, 256) > 0.5 + input_transformed = cca(input_tensor) + + input_tensor = torch.rand(1, 256, 256, 256) + cropper = global_preprocessing_dict["crop_external_zero_planes"]( + patch_size=[128, 128, 128] + ) + input_transformed = cropper(input_tensor) + + cropper = global_preprocessing_dict["crop"]([64, 64, 64]) + input_transformed = cropper(input_tensor) + assert input_transformed.shape == (1, 128, 128, 128), "Cropping should work" + + cropper = global_preprocessing_dict["centercrop"]([128, 128, 128]) + input_transformed = cropper(input_tensor) + assert input_transformed.shape == (1, 128, 128, 128), "Center-crop should work" + + # test pure morphological operations + input_tensor_3d = torch.rand(1, 1, 256, 256, 256) + input_tensor_2d = torch.rand(1, 3, 256, 256) + for mode in ["dilation", "erosion", "opening", "closing"]: + input_transformed_3d = torch_morphological(input_tensor_3d, mode=mode) + assert len(input_transformed_3d.shape) == 5, "Output should be 5D" + input_transformed_2d = torch_morphological(input_tensor_2d, mode=mode) + assert len(input_transformed_2d.shape) == 4, "Output should be 4D" + + # test for failure + with pytest.raises(Exception) as exc_info: + input_tensor_4d = torch.rand(1, 1, 32, 32, 32, 32) + input_transformed_3d = torch_morphological(input_tensor_4d) + + print("Exception raised:", exc_info.value) + + # test obtaining arrays + input_tensor_3d = torch.rand(256, 256, 256) + input_array = get_array_from_image_or_tensor(input_tensor_3d) + assert isinstance(input_array, np.ndarray), "Array should be obtained from tensor" + input_image = sitk.GetImageFromArray(input_array) + input_array = get_array_from_image_or_tensor(input_image) + assert isinstance(input_array, np.ndarray), "Array should be obtained from image" + input_array = get_array_from_image_or_tensor(input_array) + assert isinstance(input_array, np.ndarray), "Array should be obtained from array" + + with pytest.raises(Exception) as exc_info: + input_list = [0, 1] + input_array = get_array_from_image_or_tensor(input_list) + exception_raised = exc_info.value + print("Exception raised: ", exception_raised) + + ## image rescaling test + input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) + # try out different options + for params in [ + {}, + None, + {"in_min_max": [5, 250], "out_min_max": [-1, 2]}, + {"out_min_max": [0, 1], "percentiles": [5, 95]}, + ]: + rescaler = global_preprocessing_dict["rescale"](params) + input_transformed = rescaler(input_tensor) + assert ( + input_transformed.min() >= rescaler.out_min_max[0] + ), "Rescaling should work for min" + assert ( + input_transformed.max() <= rescaler.out_min_max[1] + ), "Rescaling should work for max" + + # tests for histology alpha check + input_tensor = torch.randint(0, 256, (1, 64, 64, 64)) + _ = get_nonzero_percent(input_tensor) + assert not ( + alpha_rgb_2d_channel_check(input_tensor) + ), "Alpha channel check should work for 4D tensors" + input_tensor = torch.randint(0, 256, (64, 64, 64)) + assert not ( + alpha_rgb_2d_channel_check(input_tensor) + ), "Alpha channel check should work for 3D images" + input_tensor = torch.randint(0, 256, (64, 64, 4)) + assert not ( + alpha_rgb_2d_channel_check(input_tensor) + ), "Alpha channel check should work for generic 4D images" + input_tensor = torch.randint(0, 256, (64, 64)) + assert alpha_rgb_2d_channel_check( + input_tensor + ), "Alpha channel check should work for grayscale 2D images" + input_tensor = torch.randint(0, 256, (64, 64, 3)) + assert alpha_rgb_2d_channel_check( + input_tensor + ), "Alpha channel check should work for RGB images" + input_tensor = torch.randint(0, 256, (64, 64, 4)) + input_tensor[:, :, 3] = 255 + assert alpha_rgb_2d_channel_check( + input_tensor + ), "Alpha channel check should work for RGBA images" + input_array = torch.randint(0, 256, (64, 64, 3)).numpy() + temp_filename = os.path.join(outputDir, "temp.png") + cv2.imwrite(temp_filename, input_array) + temp_filename_tiff = convert_to_tiff(temp_filename, outputDir) + assert os.path.exists(temp_filename_tiff), "Tiff file should be created" + + # resize tests + input_tensor = np.random.randint(0, 255, size=(20, 20, 20)) + input_image = sitk.GetImageFromArray(input_tensor) + expected_output = (10, 10, 10) + input_transformed = resize_image(input_image, expected_output) + assert input_transformed.GetSize() == expected_output, "Resize should work" + input_tensor = np.random.randint(0, 255, size=(20, 20)) + input_image = sitk.GetImageFromArray(input_tensor) + expected_output = [10, 10] + output_size_dict = {"resize": expected_output} + input_transformed = resize_image(input_image, output_size_dict) + assert list(input_transformed.GetSize()) == expected_output, "Resize should work" + + sanitize_outputDir() + + print("passed") + + +def test_generic_augmentation_functions(): + print("29: Starting testing augmentation functions") + params_all_preprocessing_and_augs = ConfigManager( + os.path.join(baseConfigDir, "config_all_options.yaml") + ) + + # this is for rgb augmentation + input_tensor = torch.rand(3, 128, 128, 1) + temp = global_augs_dict["colorjitter"]( + params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"] + ) + output_tensor = None + output_tensor = temp(input_tensor) + assert output_tensor != None, "RGB Augmentation should work" + + # ensuring all code paths are covered + for key in ["brightness", "contrast", "saturation", "hue"]: + params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"][ + key + ] = 0.25 + temp = global_augs_dict["colorjitter"]( + params_all_preprocessing_and_augs["data_augmentation"]["colorjitter"] + ) + output_tensor = None + output_tensor = temp(input_tensor) + assert output_tensor != None, "RGB Augmentation should work" + + # testing HED transforms with different options + input_tensor = torch.rand(3, 128, 128, 1) + params = { + "data_augmentation": { + "hed_transform": {}, + # "hed_transform_light": {}, + # "hed_transform_heavy": {}, + } + } + temp = global_augs_dict["hed_transform"]( + params_all_preprocessing_and_augs["data_augmentation"]["hed_transform"] + ) + ranges = [ + "haematoxylin_bias_range", + "eosin_bias_range", + "dab_bias_range", + "haematoxylin_sigma_range", + "eosin_sigma_range", + "dab_sigma_range", + ] + + default_range = [-0.1, 0.1] + for key in ranges: + params["data_augmentation"]["hed_transform"].setdefault(key, default_range) + + params["data_augmentation"]["hed_transform"].setdefault( + "cutoff_range", [0.05, 0.95] + ) + + # Check if the params are correctly set for each augmentation type + assert params["data_augmentation"]["hed_transform"] == { + "haematoxylin_bias_range": [-0.1, 0.1], + "eosin_bias_range": [-0.1, 0.1], + "dab_bias_range": [-0.1, 0.1], + "haematoxylin_sigma_range": [-0.1, 0.1], + "eosin_sigma_range": [-0.1, 0.1], + "dab_sigma_range": [-0.1, 0.1], + "cutoff_range": [0.05, 0.95], + } + temp = global_augs_dict["hed_transform"]( + params_all_preprocessing_and_augs["data_augmentation"]["hed_transform"] + ) + output_tensor = None + output_tensor = temp(input_tensor) + assert output_tensor != None, "HED Augmentation should work" + + # this is for all other augmentations + input_tensor = torch.rand(3, 128, 128, 128) + for aug in params_all_preprocessing_and_augs["data_augmentation"]: + aug_lower = aug.lower() + output_tensor = None + if aug_lower in global_augs_dict: + output_tensor = global_augs_dict[aug]( + params_all_preprocessing_and_augs["data_augmentation"][aug_lower] + )(input_tensor) + assert output_tensor != None, "Augmentation should work" + + # additional test for elastic + params_elastic = params_all_preprocessing_and_augs["data_augmentation"]["elastic"] + for key_to_pop in ["num_control_points", "max_displacement", "locked_borders"]: + params_elastic.pop(key_to_pop, None) + output_tensor = global_augs_dict["elastic"](params_elastic)(input_tensor) + assert output_tensor != None, "Augmentation for base elastic transform should work" + + sanitize_outputDir() + + print("passed") + + +def test_train_checkpointing_segmentation_rad_2d(device): + print("30: Starting 2D Rad segmentation tests for metrics") + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + parameters["patch_sampler"] = { + "type": "label", + "enable_padding": True, + "biased_sampling": True, + } + file_config_temp = write_temp_config_path(parameters) + parameters = ConfigManager(file_config_temp, version_check_flag=False) + + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["num_epochs"] = 1 + parameters["nested_training"]["testing"] = 1 + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["num_channels"] = 3 + parameters["metrics"] = [ + "dice", + "dice_per_label", + "hausdorff", + "hausdorff95", + "hd95_per_label", + "hd100_per_label", + "normalized_surface_dice", + "normalized_surface_dice_per_label", + "sensitivity", + "sensitivity_per_label", + "specificity_segmentation", + "specificity_segmentation_per_label", + "jaccard", + "jaccard_per_label", + ] + parameters["model"]["architecture"] = "unet" + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + parameters["num_epochs"] = 2 + parameters["nested_training"]["validation"] = -2 + parameters["nested_training"]["testing"] = 1 + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=False, + ) + + sanitize_outputDir() + + print("passed") + + +def test_generic_model_patch_divisibility(): + print("31: Starting patch divisibility tests") + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + _, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + parameters["model"]["architecture"] = "unet" + parameters["patch_size"] = [127, 127, 1] + parameters["num_epochs"] = 1 + parameters["nested_training"]["testing"] = 1 + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["print_summary"] = False + parameters["model"]["num_channels"] = 3 + parameters["metrics"] = ["dice"] + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + + # this assertion should fail + with pytest.raises(BaseException) as _: + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + parameters["model"]["architecture"] = "uinc" + parameters["model"]["base_filters"] = 11 + + # this assertion should fail + with pytest.raises(BaseException) as _: + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + sanitize_outputDir() + + print("passed") + + +def test_generic_one_hot_logic(): + print("32: Starting one hot logic tests") + random_array = np.random.randint(5, size=(20, 20, 20)) + img = sitk.GetImageFromArray(random_array) + img_tensor = get_tensor_from_image(img).to(torch.float16) + img_tensor = img_tensor.unsqueeze(0).unsqueeze(0) + + class_list = [*range(0, np.max(random_array) + 1)] + img_tensor_oh = one_hot(img_tensor, class_list) + img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) + comparison = random_array == img_tensor_oh_rev_array + assert comparison.all(), "Arrays are not equal" + + class_list = ["0", "1||2||3", np.max(random_array)] + img_tensor_oh = one_hot(img_tensor, class_list) + img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) + + # check for background + comparison = (random_array == 0) == (img_tensor_oh_rev_array == 0) + assert comparison.all(), "Arrays at '0' are not equal" + + # check last foreground + comparison = (random_array == np.max(random_array)) == ( + img_tensor_oh_rev_array == len(class_list) - 1 + ) + assert comparison.all(), "Arrays at final foreground are not equal" + + # check combined foreground + combined_array = np.logical_or( + np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) + ) + comparison = combined_array == (img_tensor_oh_rev_array == 1) + assert comparison.all(), "Arrays at the combined foreground are not equal" + + parameters = {"data_postprocessing": {}} + mapped_output = get_mapped_label( + torch.from_numpy(img_tensor_oh_rev_array), parameters + ) + + parameters = {} + mapped_output = get_mapped_label( + torch.from_numpy(img_tensor_oh_rev_array), parameters + ) + + parameters = {"data_postprocessing": {"mapping": {0: 0, 1: 1, 2: 5}}} + mapped_output = get_mapped_label( + torch.from_numpy(img_tensor_oh_rev_array), parameters + ) + + for key, value in parameters["data_postprocessing"]["mapping"].items(): + comparison = (img_tensor_oh_rev_array == key) == (mapped_output == value) + assert comparison.all(), "Arrays at {}:{} are not equal".format(key, value) + + # check the case where 0 is present as an int in a special case + class_list = [0, "1||2||3", np.max(random_array)] + img_tensor_oh = one_hot(img_tensor, class_list) + img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) + + # check for background + comparison = (random_array == 0) == (img_tensor_oh_rev_array == 0) + assert comparison.all(), "Arrays at '0' are not equal" + + # check the case where 0 is absent from class_list + class_list = ["1||2||3", np.max(random_array)] + img_tensor_oh = one_hot(img_tensor, class_list) + img_tensor_oh_rev_array = reverse_one_hot(img_tensor_oh[0], class_list) + + # check last foreground + comparison = (random_array == np.max(random_array)) == ( + img_tensor_oh_rev_array == len(class_list) + ) + assert comparison.all(), "Arrays at final foreground are not equal" + + # check combined foreground + combined_array = np.logical_or( + np.logical_or((random_array == 1), (random_array == 2)), (random_array == 3) + ) + comparison = combined_array == (img_tensor_oh_rev_array == 1) + assert comparison.all(), "Arrays at the combined foreground are not equal" + + sanitize_outputDir() + + print("passed") + + +def test_generic_anonymizer(): + print("33: Starting anomymizer tests") + input_file = get_testdata_file("MR_small.dcm") + + output_file = os.path.join(outputDir, "MR_small_anonymized.dcm") + + config_file = os.path.join(baseConfigDir, "config_anonymizer.yaml") + run_anonymizer(input_file, output_file, config_file, "rad") + assert os.path.exists(output_file), "Anonymized file does not exist" -# def test_generic_deploy_metrics_docker(): -# print("50: Testing deployment of a metrics generator to Docker") -# # requires an installed Docker engine + # test defaults + run_anonymizer(input_file, output_file, None, "rad") + assert os.path.exists(output_file), "Anonymized file does not exist" -# deploymentOutputDir = os.path.join(outputDir, "mlcube") - -# result = run_deployment( -# os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), -# deploymentOutputDir, -# "docker", -# "metrics", -# ) + # test nifti conversion + config_file_for_nifti = os.path.join(outputDir, "config_anonymizer_nifti.yaml") + with open(config_file, "r") as file_data: + yaml_data = file_data.read() + parameters = yaml.safe_load(yaml_data) + parameters["convert_to_nifti"] = True + with open(config_file_for_nifti, "w") as file: + yaml.dump(parameters, file) -# assert result, "run_deployment returned false" -# sanitize_outputDir() + # for nifti conversion, the input needs to be in a dir + input_folder_for_nifti = os.path.join(outputDir, "nifti_input") + Path(input_folder_for_nifti).mkdir(parents=True, exist_ok=True) + shutil.copyfile(input_file, os.path.join(input_folder_for_nifti, "MR_small.dcm")) -# print("passed") + output_file = os.path.join(outputDir, "MR_small.nii.gz") + run_anonymizer(input_folder_for_nifti, output_file, config_file_for_nifti, "rad") + assert os.path.exists(output_file), "Anonymized file does not exist" -# def test_generic_data_split(): -# print("51: Starting test for splitting and saving CSVs") -# # read and initialize parameters for specific data dimension -# parameters = ConfigManager( -# testingDir + "/config_classification.yaml", version_check_flag=False -# ) -# parameters["nested_training"] = {"testing": 5, "validation": 5, "stratified": True} -# # read and parse csv -# training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_classification.csv") -# # duplicate the data to test stratified sampling -# training_data_duplicate = training_data._append(training_data) -# for _ in range(1): -# training_data_duplicate = training_data_duplicate._append( -# training_data_duplicate -# ) -# training_data_duplicate.reset_index(drop=True, inplace=True) -# # ensure subjects are not duplicated -# training_data_duplicate["SubjectID"] = training_data_duplicate.index - -# sanitize_outputDir() - -# split_data_and_save_csvs(training_data_duplicate, outputDir, parameters) - -# files_in_outputDir = os.listdir(outputDir) -# assert len(files_in_outputDir) == 15, "CSVs were not split correctly" - -# sanitize_outputDir() - -# print("passed") + if not os.path.exists(output_file): + raise Exception("Output NIfTI file was not created") + + input_file = os.path.join(inputDir, "2d_histo_segmentation", "1", "image.tiff") + output_file_histo = os.path.join(outputDir, "histo_anon.tiff") + # this assertion should fail since histo anonymizer is not implementer + with pytest.raises(BaseException) as exc_info: + run_anonymizer(input_folder_for_nifti, output_file_histo, None, "histo") + assert os.path.exists(output_file_histo), "Anonymized file does not exist" + print("Exception raised: ", exc_info.value) + sanitize_outputDir() + + print("passed") + + +def test_train_inference_segmentation_histology_2d(device): + print("34: Starting histology train/inference segmentation tests") + # overwrite previous results + sanitize_outputDir() + output_dir_patches = os.path.join(outputDir, "histo_patches") + if os.path.isdir(output_dir_patches): + shutil.rmtree(output_dir_patches) + Path(output_dir_patches).mkdir(parents=True, exist_ok=True) + output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") + Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) + + parameters_patch = {} + # extracting minimal number of patches to ensure that the test does not take too long + parameters_patch["num_patches"] = 10 + parameters_patch["read_type"] = "sequential" + # define patches to be extracted in terms of microns + parameters_patch["patch_size"] = ["1000m", "1000m"] + + file_config_temp = write_temp_config_path(parameters_patch) + + patch_extraction( + inputDir + "/train_2d_histo_segmentation.csv", + output_dir_patches_output, + file_config_temp, + ) + + file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) + parameters["patch_size"] = patch_size["2D"] + parameters["modality"] = "histo" + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["num_channels"] = 3 + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["model"]["architecture"] = "resunet" + parameters["nested_training"]["testing"] = 1 + parameters["nested_training"]["validation"] = -2 + parameters["metrics"] = ["dice"] + parameters["model"]["onnx_export"] = True + parameters["model"]["print_summary"] = True + parameters["data_preprocessing"]["resize_image"] = [128, 128] + modelDir = os.path.join(outputDir, "modelDir") + Path(modelDir).mkdir(parents=True, exist_ok=True) + TrainingManager( + dataframe=training_data, + outputDir=modelDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + inference_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_histo_segmentation.csv", train=False + ) + inference_data.drop(index=inference_data.index[-1], axis=0, inplace=True) + InferenceManager( + dataframe=inference_data, + modelDir=modelDir, + parameters=parameters, + device=device, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_inference_classification_histology_large_2d(device): + print( + "35: Starting histology train/inference classification tests for large images to check exception handling" + ) + # overwrite previous results + sanitize_outputDir() + output_dir_patches = os.path.join(outputDir, "histo_patches") + if os.path.isdir(output_dir_patches): + shutil.rmtree(output_dir_patches) + Path(output_dir_patches).mkdir(parents=True, exist_ok=True) + output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") + Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) + + for sub in ["1", "2"]: + file_to_check = os.path.join( + inputDir, "2d_histo_segmentation", sub, "image_resize.tiff" + ) + if os.path.exists(file_to_check): + os.remove(file_to_check) + + parameters_patch = {} + # extracting minimal number of patches to ensure that the test does not take too long + parameters_patch["num_patches"] = 3 + parameters_patch["patch_size"] = [128, 128] + parameters_patch["value_map"] = {0: 0, 255: 255} + + file_config_temp = write_temp_config_path(parameters_patch) + + patch_extraction( + inputDir + "/train_2d_histo_classification.csv", + output_dir_patches_output, + file_config_temp, + ) + + # resize the image + input_df, _ = parseTrainingCSV( + inputDir + "/train_2d_histo_classification.csv", train=False + ) + files_to_delete = [] + + def resize_for_ci(filename, scale): + """ + Helper function to resize images in CI + + Args: + filename (str): Filename of the image to be resized + scale (float): Scale factor to resize the image + + Returns: + str: Filename of the resized image + """ + new_filename = filename.replace(".tiff", "_resize.tiff") + try: + img = cv2.imread(filename) + dims = img.shape + img_resize = cv2.resize(img, (dims[1] * scale, dims[0] * scale)) + cv2.imwrite(new_filename, img_resize) + except Exception as ex1: + # this is only used in CI + print("Trying vips:", ex1) + try: + os.system( + "vips resize " + filename + " " + new_filename + " " + str(scale) + ) + except Exception as ex2: + print("Resize could not be done:", ex2) + return new_filename + + for _, row in input_df.iterrows(): + # ensure opm mask size check is triggered + _, _ = generate_initial_mask(resize_for_ci(row["Channel_0"], scale=2), 1) + + for patch_size in [ + [128, 128], + "[100m,100m]", + "[100mx100m]", + "[100mX100m]", + "[100m*100m]", + ]: + _ = get_patch_size_in_microns(row["Channel_0"], patch_size) + + # try to break resizer + new_filename = resize_for_ci(row["Channel_0"], scale=10) + row["Channel_0"] = new_filename + files_to_delete.append(new_filename) + # we do not need the last subject + break + + resized_inference_data_list = os.path.join( + inputDir, "train_2d_histo_classification_resize.csv" + ) + # drop last subject + input_df.drop(index=input_df.index[-1], axis=0, inplace=True) + input_df.to_csv(resized_inference_data_list, index=False) + files_to_delete.append(resized_inference_data_list) + + file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") + temp_df = pd.read_csv(file_for_Training) + temp_df.drop("Label", axis=1, inplace=True) + temp_df["valuetopredict"] = np.random.randint(2, size=len(temp_df)) + temp_df.to_csv(file_for_Training, index=False) + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "histo" + parameters["patch_size"] = parameters_patch["patch_size"][0] + file_config_temp = write_temp_config_path(parameters) + parameters = ConfigManager(file_config_temp, version_check_flag=False) + parameters["model"]["dimension"] = 2 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) + parameters["model"]["num_channels"] = 3 + parameters["model"]["architecture"] = "densenet121" + parameters["model"]["norm_type"] = "none" + parameters["data_preprocessing"]["rgba2rgb"] = "" + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["nested_training"]["testing"] = 1 + parameters["nested_training"]["validation"] = -2 + parameters["model"]["print_summary"] = False + modelDir = os.path.join(outputDir, "modelDir") + if os.path.isdir(modelDir): + shutil.rmtree(modelDir) + Path(modelDir).mkdir(parents=True, exist_ok=True) + TrainingManager( + dataframe=training_data, + outputDir=modelDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + parameters["output_dir"] = modelDir # this is in inference mode + parameters["data_preprocessing"]["resize_patch"] = parameters_patch["patch_size"] + parameters["patch_size"] = [ + parameters_patch["patch_size"][0] * 10, + parameters_patch["patch_size"][1] * 10, + ] + parameters["nested_training"]["validation"] = 1 + inference_data, parameters["headers"] = parseTrainingCSV( + resized_inference_data_list, train=False + ) + for model_type in all_model_type: + parameters["model"]["type"] = model_type + InferenceManager( + dataframe=inference_data, + modelDir=modelDir, + parameters=parameters, + device=device, + ) + all_folders_in_modelDir = os.listdir(modelDir) + for folder in all_folders_in_modelDir: + output_subject_dir = os.path.join(modelDir, folder) + if os.path.isdir(output_subject_dir): + # check in the default outputDir that's created - this is based on a unique timestamp + if folder != "output_validation": + # if 'predictions.csv' are not found, give error + assert os.path.exists( + os.path.join( + output_subject_dir, + str(input_df["SubjectID"][0]), + "predictions.csv", + ) + ), "predictions.csv not found" + # ensure previous results are removed + sanitize_outputDir() + + for file in files_to_delete: + os.remove(file) + + sanitize_outputDir() + + print("passed") + + +def test_train_inference_classification_histology_2d(device): + print("36: Starting histology train/inference classification tests") + # overwrite previous results + sanitize_outputDir() + output_dir_patches = os.path.join(outputDir, "histo_patches") + if os.path.isdir(output_dir_patches): + shutil.rmtree(output_dir_patches) + Path(output_dir_patches).mkdir(parents=True, exist_ok=True) + output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") + + parameters_patch = {} + # extracting minimal number of patches to ensure that the test does not take too long + parameters_patch["patch_size"] = [128, 128] + + for num_patches in [-1, 3]: + parameters_patch["num_patches"] = num_patches + file_config_temp = write_temp_config_path(parameters_patch) + + if os.path.exists(output_dir_patches_output): + shutil.rmtree(output_dir_patches_output) + # this ensures that the output directory for num_patches=3 is preserved + Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) + patch_extraction( + inputDir + "/train_2d_histo_classification.csv", + output_dir_patches_output, + file_config_temp, + ) + + file_for_Training = os.path.join(output_dir_patches_output, "opm_train.csv") + temp_df = pd.read_csv(file_for_Training) + temp_df.drop("Label", axis=1, inplace=True) + temp_df["valuetopredict"] = np.random.randint(2, size=6) + temp_df.to_csv(file_for_Training, index=False) + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "histo" + parameters["patch_size"] = 128 + file_config_temp = write_temp_config_path(parameters) + parameters = ConfigManager(file_config_temp, version_check_flag=False) + parameters["model"]["dimension"] = 2 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV(file_for_Training) + parameters["model"]["num_channels"] = 3 + parameters["model"]["architecture"] = "densenet121" + parameters["model"]["norm_type"] = "none" + parameters["data_preprocessing"]["rgba2rgb"] = "" + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + parameters["nested_training"]["testing"] = 1 + parameters["nested_training"]["validation"] = -2 + parameters["model"]["print_summary"] = False + modelDir = os.path.join(outputDir, "modelDir") + if os.path.isdir(modelDir): + shutil.rmtree(modelDir) + Path(modelDir).mkdir(parents=True, exist_ok=True) + TrainingManager( + dataframe=training_data, + outputDir=modelDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + parameters["output_dir"] = modelDir # this is in inference mode + inference_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_histo_classification.csv", train=False + ) + for model_type in all_model_type: + parameters["nested_training"]["testing"] = 1 + parameters["nested_training"]["validation"] = -2 + parameters["output_dir"] = modelDir # this is in inference mode + inference_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_histo_segmentation.csv", train=False + ) + parameters["model"]["type"] = model_type + InferenceManager( + dataframe=inference_data, + modelDir=modelDir, + parameters=parameters, + device=device, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_segmentation_unet_layerchange_rad_2d(device): + # test case to up code coverage --> test decreasing allowed layers for unet + print("37: Starting 2D Rad segmentation tests for normtype") + # read and parse csv + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + for model in ["unet_multilayer", "lightunet_multilayer", "unetr"]: + parameters["model"]["architecture"] = model + parameters["patch_size"] = [4, 4, 1] + parameters["model"]["dimension"] = 2 + + # this assertion should fail + with pytest.raises(BaseException) as _: + global_models_dict[parameters["model"]["architecture"]]( + parameters=parameters + ) + + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["depth"] = 7 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["print_summary"] = False + parameters["model"]["num_channels"] = 3 + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + parameters["model"]["norm_type"] = "batch" + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + if os.path.isdir(outputDir): + shutil.rmtree(outputDir) # overwrite previous results + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_segmentation_unetr_rad_3d(device): + print("38: Testing UNETR for 3D segmentation") + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_segmentation.csv" + ) + parameters["model"]["architecture"] = "unetr" + parameters["patch_size"] = [4, 4, 4] + parameters["model"]["dimension"] = 3 + parameters["model"]["depth"] = 2 + parameters["model"]["print_summary"] = False + + # this assertion should fail + with pytest.raises(BaseException) as _: + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + parameters["model"]["dimension"] = 3 + parameters["patch_size"] = [32, 32, 32] + + with pytest.raises(BaseException) as _: + parameters["model"]["inner_patch_size"] = 19 + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + with pytest.raises(BaseException) as _: + parameters["model"]["inner_patch_size"] = 64 + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + for patch in [16, 8]: + parameters["model"]["inner_patch_size"] = patch + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["num_channels"] = len( + parameters["headers"]["channelHeaders"] + ) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + parameters["model"]["norm_type"] = "batch" + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + if os.path.isdir(outputDir): + shutil.rmtree(outputDir) # overwrite previous results + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_segmentation_unetr_rad_2d(device): + print("39: Testing UNETR for 2D segmentation") + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + parameters["model"]["architecture"] = "unetr" + parameters["patch_size"] = [128, 128, 1] + parameters["model"]["dimension"] = 2 + + for patch in [16, 8]: + parameters["model"]["inner_patch_size"] = patch + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["print_summary"] = False + parameters["model"]["num_channels"] = 3 + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + parameters["model"]["norm_type"] = "batch" + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + if os.path.isdir(outputDir): + shutil.rmtree(outputDir) # overwrite previous results + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_segmentation_transunet_rad_2d(device): + print("40: Testing TransUNet for 2D segmentation") + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + parameters["model"]["architecture"] = "transunet" + parameters["patch_size"] = [128, 128, 1] + parameters["model"]["dimension"] = 2 + parameters["model"]["print_summary"] = False + + with pytest.raises(BaseException) as _: + parameters["model"]["num_heads"] = 6 + parameters["model"]["embed_dim"] = 64 + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + with pytest.raises(BaseException) as _: + parameters["model"]["num_heads"] = 3 + parameters["model"]["embed_dim"] = 50 + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + parameters["model"]["embed_dim"] = 64 + parameters["model"]["depth"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["num_heads"] = 8 + parameters["model"]["amp"] = True + parameters["model"]["num_channels"] = 3 + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + parameters["model"]["norm_type"] = "batch" + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + if os.path.isdir(outputDir): + shutil.rmtree(outputDir) # overwrite previous results + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_segmentation_transunet_rad_3d(device): + print("41: Testing TransUNet for 3D segmentation") + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_segmentation.csv" + ) + parameters["model"]["architecture"] = "transunet" + parameters["patch_size"] = [4, 4, 4] + parameters["model"]["dimension"] = 3 + parameters["model"]["print_summary"] = False + + # this assertion should fail + with pytest.raises(BaseException) as _: + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + parameters["model"]["dimension"] = 3 + parameters["patch_size"] = [32, 32, 32] + + with pytest.raises(BaseException) as _: + parameters["model"]["depth"] = 1 + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + with pytest.raises(BaseException) as _: + parameters["model"]["num_heads"] = 6 + parameters["model"]["embed_dim"] = 64 + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + with pytest.raises(BaseException) as _: + parameters["model"]["num_heads"] = 3 + parameters["model"]["embed_dim"] = 50 + global_models_dict[parameters["model"]["architecture"]](parameters=parameters) + + parameters["model"]["num_heads"] = 8 + parameters["model"]["embed_dim"] = 64 + parameters["model"]["depth"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + parameters["model"]["norm_type"] = "batch" + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + if os.path.isdir(outputDir): + shutil.rmtree(outputDir) # overwrite previous results + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_train_gradient_clipping_classification_rad_2d(device): + print("42: Testing gradient clipping") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["modality"] = "rad" + parameters["track_memory_usage"] = True + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + # read and parse csv + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_classification.csv" + ) + parameters["model"]["num_channels"] = 3 + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # ensure gradient clipping is getting tested + for clip_mode in ["norm", "value", "agc"]: + parameters["model"]["architecture"] = "imagenet_vgg11" + parameters["model"]["final_layer"] = "softmax" + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + parameters["clip_mode"] = clip_mode + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + sanitize_outputDir() + + print("passed") + + +def test_train_segmentation_unet_conversion_rad_3d(device): + print("43: Starting 3D Rad segmentation tests for unet with ACS conversion") + # read and parse csv + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_segmentation.csv" + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + parameters["model"]["class_list"] = [0, 1] + parameters["model"]["final_layer"] = "softmax" + parameters["model"]["amp"] = True + parameters["in_memory"] = True + parameters["verbose"] = False + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for model in ["unet", "unet_multilayer", "lightunet_multilayer"]: + for converter_type in ["acs", "soft", "conv3d"]: + parameters["model"]["converter_type"] = converter_type + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_generic_cli_function_configgenerator(): + print("44: Starting testing cli function for config generator") + base_config_path = os.path.join(baseConfigDir, "config_all_options.yaml") + generator_config_path = os.path.join( + baseConfigDir, "config_generator_sample_strategy.yaml" + ) + sanitize_outputDir() + config_generator(base_config_path, generator_config_path, outputDir) + all_files = os.listdir(outputDir) + assert len(all_files) == 72, "config generator did not generate all files" + + for file in all_files: + parameters = None + with suppress_stdout_stderr(): + parameters = ConfigManager( + os.path.join(outputDir, file), version_check_flag=False + ) + assert parameters, "config generator did not generate valid config files" + sanitize_outputDir() + + generator_config = yaml.safe_load(open(generator_config_path, "r")) + generator_config["second_level_dict_that_should_fail"] = { + "key_1": {"key_2": "value"} + } + + file_config_temp = write_temp_config_path(generator_config) + + # test for failure + with pytest.raises(Exception) as exc_info: + config_generator(base_config_path, file_config_temp, outputDir) + sanitize_outputDir() + + print("Exception raised:", exc_info.value) + + sanitize_outputDir() + + print("passed") + + +def test_generic_cli_function_recoverconfig(): + print("45: Testing cli function for recover_config") + # Train, then recover a config and see if it exists/is valid YAML + + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + # patch_size is custom for sdnet + parameters["patch_size"] = [224, 224, 1] + parameters["batch_size"] = 2 + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["num_channels"] = 1 + parameters["model"]["architecture"] = "sdnet" + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + output_config_path = write_temp_config_path(None) + assert recover_config( + outputDir, output_config_path + ), "recover_config returned false" + assert os.path.exists(output_config_path), "Didn't create a config file" + + new_params = ConfigManager(output_config_path, version_check_flag=False) + assert new_params, "Created YAML could not be parsed by ConfigManager" + + sanitize_outputDir() + + print("passed") + + +def test_generic_deploy_docker(): + print("46: Testing deployment of a model to Docker") + # Train, then try deploying that model (requires an installed Docker engine) + + deploymentOutputDir = os.path.join(outputDir, "mlcube") + # read and parse csv + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_2d_rad_segmentation.csv" + ) + + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["num_channels"] = 3 + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters["data_preprocessing"]["resize_image"] = [224, 224] + parameters["memory_save_mode"] = True + + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + custom_entrypoint = os.path.join( + gandlfRootDir, + "mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py", + ) + for entrypoint_script in [None, custom_entrypoint]: + result = run_deployment( + os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), + deploymentOutputDir, + "docker", + "model", + entrypoint_script=entrypoint_script, + configfile=testingDir + "/config_segmentation.yaml", + modeldir=outputDir, + requires_gpu=True, + ) + msg = "run_deployment returned false" + if entrypoint_script: + msg += " with custom entrypoint script" + assert result, msg + + sanitize_outputDir() + + print("passed") + + +def test_collision_subjectid_test_segmentation_rad_2d(device): + print("47: Starting 2D Rad segmentation tests for collision of subjectID in test") + parameters = ConfigManager( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["num_epochs"] = 1 + parameters["nested_training"]["testing"] = 1 + parameters["model"]["dimension"] = 2 + parameters["model"]["class_list"] = [0, 255] + parameters["model"]["amp"] = True + parameters["model"]["print_summary"] = False + parameters["model"]["num_channels"] = 3 + parameters["metrics"] = ["dice"] + parameters["model"]["architecture"] = "unet" + outputDir = os.path.join(testingDir, "data_output") + + file_config_temp = write_temp_config_path(parameters) + + # test the case where outputDir is explicitly provided to InferenceManager + train_data_path = inputDir + "/train_2d_rad_segmentation.csv" + test_data_path = inputDir + "/test_2d_rad_segmentation.csv" + df = pd.read_csv(train_data_path) + temp_df = pd.read_csv(train_data_path) + # Concatenate the two dataframes + df = pd.concat([df, temp_df], ignore_index=True) + + df.to_csv(test_data_path, index=False) + _, testing_data, _ = parseTestingCSV(test_data_path, outputDir) + # Save testing data to a csv file + testing_data.to_csv(test_data_path, index=False) + + main_run( + train_data_path + "," + train_data_path + "," + test_data_path, + file_config_temp, + outputDir, + False, + device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") + + +def test_generic_random_numbers_are_deterministic_on_cpu(): + print("48: Starting testing deterministic random numbers generation") + + set_determinism(seed=42) + a, b = np.random.rand(3, 3), np.random.rand(3, 3) + + set_determinism(seed=42) + c, d = np.random.rand(3, 3), np.random.rand(3, 3) + + # Check that the generated random numbers are the same with numpy + assert np.allclose(a, c) + assert np.allclose(b, d) + + e, f = [random.random() for _ in range(5)], [random.random() for _ in range(5)] + + set_determinism(seed=42) + g, h = [random.random() for _ in range(5)], [random.random() for _ in range(5)] + + # Check that the generated random numbers are the same with Python's built-in random module + assert e == g + assert f == h + + print("passed") + + +def test_generic_cli_function_metrics_cli_rad_nd(): + print("49: Starting metric calculation tests") + for dim in ["2d", "3d"]: + for problem_type in ["segmentation", "classification", "synthesis"]: + synthesis_detected = problem_type == "synthesis" + problem_type_wrap = problem_type + if synthesis_detected: + problem_type_wrap = "classification" + # read and parse csv + training_data, _ = parseTrainingCSV( + inputDir + f"/train_{dim}_rad_{problem_type_wrap}.csv" + ) + if problem_type_wrap == "segmentation": + labels_array = training_data["Label"] + elif synthesis_detected: + labels_array = training_data["Channel_0"] + else: + labels_array = training_data["ValueToPredict"] + training_data["target"] = labels_array + training_data["prediction"] = labels_array + if synthesis_detected: + # this optional + training_data["mask"] = training_data["Label"] + + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + f"/config_{problem_type_wrap}.yaml", + version_check_flag=False, + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["2D"] + parameters["model"]["dimension"] = 2 + if dim == "3d": + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + + parameters["verbose"] = False + if synthesis_detected: + parameters["problem_type"] = problem_type + + temp_infer_csv = os.path.join(outputDir, "temp_csv.csv") + training_data.to_csv(temp_infer_csv, index=False) + + output_file = os.path.join(outputDir, "output.yaml") + + temp_config = write_temp_config_path(parameters) + + # run the metrics calculation + generate_metrics_dict(temp_infer_csv, temp_config, output_file) + + assert os.path.isfile(output_file), "Metrics output file was not generated" + + sanitize_outputDir() + + +def test_generic_deploy_metrics_docker(): + print("50: Testing deployment of a metrics generator to Docker") + # requires an installed Docker engine + + deploymentOutputDir = os.path.join(outputDir, "mlcube") + + result = run_deployment( + os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), + deploymentOutputDir, + "docker", + "metrics", + ) + + assert result, "run_deployment returned false" + sanitize_outputDir() + + print("passed") + + +def test_generic_data_split(): + print("51: Starting test for splitting and saving CSVs") + # read and initialize parameters for specific data dimension + parameters = ConfigManager( + testingDir + "/config_classification.yaml", version_check_flag=False + ) + parameters["nested_training"] = {"testing": 5, "validation": 5, "stratified": True} + # read and parse csv + training_data, _ = parseTrainingCSV(inputDir + "/train_3d_rad_classification.csv") + # duplicate the data to test stratified sampling + training_data_duplicate = training_data._append(training_data) + for _ in range(1): + training_data_duplicate = training_data_duplicate._append( + training_data_duplicate + ) + training_data_duplicate.reset_index(drop=True, inplace=True) + # ensure subjects are not duplicated + training_data_duplicate["SubjectID"] = training_data_duplicate.index + + sanitize_outputDir() + + split_data_and_save_csvs(training_data_duplicate, outputDir, parameters) + + files_in_outputDir = os.listdir(outputDir) + assert len(files_in_outputDir) == 15, "CSVs were not split correctly" + + sanitize_outputDir() + + print("passed") def test_gandlf_logging(): + print("52: Starting test for logging") gandlf_logger_setup() message = "Testing logging" From abddbd747bfd4160cdcef0bb667d15b8cbdfc12f Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 13:12:16 +0300 Subject: [PATCH 13/34] black test_full --- testing/test_full.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/testing/test_full.py b/testing/test_full.py index c28db39fb..d10b7c151 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -3162,21 +3162,17 @@ def test_generic_data_split(): print("passed") + def test_gandlf_logging(): print("52: Starting test for logging") - + gandlf_logger_setup() message = "Testing logging" logging.info(message) - with open('tmp/gandlf/gandlf.log', 'r') as log_file: + with open("tmp/gandlf/gandlf.log", "r") as log_file: logs = log_file.read() assert message in logs - - print("passed") - - - - + print("passed") From c25396c29d56076983c8b392c7699a476c4bf2ef Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 14:05:38 +0300 Subject: [PATCH 14/34] add logging test in test_full --- testing/test_full.py | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/testing/test_full.py b/testing/test_full.py index d10b7c151..5e68df45c 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -241,8 +241,6 @@ def write_temp_config_path(parameters_to_write): # these are helper functions to be used in other tests - - def test_train_segmentation_rad_2d(device): print("03: Starting 2D Rad segmentation tests") # read and parse csv @@ -3163,16 +3161,47 @@ def test_generic_data_split(): print("passed") -def test_gandlf_logging(): +def test_gandlf_logging(capsys): print("52: Starting test for logging") gandlf_logger_setup() message = "Testing logging" - logging.info(message) + logging.debug(message) + # tests if the message is in the file.log with open("tmp/gandlf/gandlf.log", "r") as log_file: logs = log_file.read() assert message in logs + # test the stout info level. The stout must show only INFO messages + message = "Testing stout logging" + logging.info(message) + capture = capsys.readouterr() + assert message in capture.out + + # Test the stout not showing other messages + message = "Testing stout logging" + logging.debug(message) + logging.warning(message) + logging.error(message) + logging.critical(message) + capture = capsys.readouterr() + assert message not in capture.out + + # test sterr must NOT show these messages. + message = "Testing sterr logging" + logging.info(message) + logging.debug(message) + capture = capsys.readouterr() + assert message not in capture.err + + # test sterr must show these messages. + logging.error(message) + logging.warning(message) + logging.critical(message) + capture = capsys.readouterr() + assert message in capture.err + + sanitize_outputDir() print("passed") From bd9ba0d86ddbde115dd8a8044b423525c8fc364f Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 14:28:04 +0300 Subject: [PATCH 15/34] remove unnecessary imports --- GANDLF/compute/forward_pass.py | 2 +- GANDLF/utils/gandlf_logger.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 7eee2c190..539c97538 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -22,7 +22,7 @@ ) from GANDLF.metrics import overall_stats from tqdm import tqdm -from GANDLF.utils import gandlf_logger_setup + def validate_network( diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 17560353c..2d8412ca4 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -1,5 +1,4 @@ import logging -from logging import config import yaml from pathlib import Path from importlib import resources From 57f02c95fca999fd0f949be32530bc94a14b5a43 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Wed, 3 Jul 2024 14:29:39 +0300 Subject: [PATCH 16/34] black forward_pass --- GANDLF/compute/forward_pass.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 539c97538..b2a70cc24 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -24,7 +24,6 @@ from tqdm import tqdm - def validate_network( model: torch.nn.Module, valid_dataloader: DataLoader, From f1a3dfcedaf9e5b25954e766544ef72dd1b8d1d3 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Wed, 3 Jul 2024 18:32:37 +0300 Subject: [PATCH 17/34] change the logging test name Co-authored-by: Sarthak Pati --- testing/test_full.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test_full.py b/testing/test_full.py index 5e68df45c..11ba1ee0c 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -3161,7 +3161,7 @@ def test_generic_data_split(): print("passed") -def test_gandlf_logging(capsys): +def test_generic_logging(capsys): print("52: Starting test for logging") gandlf_logger_setup() From 7d6f25e76d0bc19d478318599554ba7d6ca48ffe Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Thu, 4 Jul 2024 11:03:13 +0300 Subject: [PATCH 18/34] Add logging documentation (#10) --- docs/extending.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/extending.md b/docs/extending.md index 229ce333c..df36f2440 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -132,4 +132,40 @@ bash # continue from previous shell (venv_gandlf) $> coverage run -m pytest --device cuda; coverage report -m ``` +## Logging + +### Use loggers instead of print +We use the native `logging` [library](https://docs.python.org/3/library/logging.html) for logs management. +It is already configured. So, If you are extending the code, please use loggers instead of prints. + +Here is an example how logger can be used: + +``` +def my_new_cool_function(df: pd.DataFrame): + logger = logging.getLogger(__name__) # you can use any your own logger name or just pass a current file name + logger.debug("Message for debug file only") + logger.info("Hi GaNDLF user, I greet you in the CLI output") + logger.error(f"A detailed message about any error if needed. Exception: {str(e)}, params: {params}, df shape: {df.shape}") + # print("Hi GaNDLF user!") # don't use prints please. +``` + +### What and where is logged + +GaNDLF logs are splitted into multiple parts: +- CLI output: only `info` messages are shown here +- debug file: all messages are shown +- stderr: display `warning,error,critical` messages + +All the logs are saved in the `tmp/gandlf` directory + +Example of log message +``` +#format: "%(asctime)s - %(name)s - %(levelname)s - %(pathname)s:%(lineno)d - %(message)s" +2024-07-03 13:05:51,642 - root - DEBUG - GaNDLF/GANDLF/entrypoints/anonymizer.py:28 - input_dir='.' +``` + +### Create your own logger +You can create and configure your own logger in the `GANDLF\logging_config.yaml`. + + From a2b834dce9b7c6da3e43af0892b4030f3358fa0f Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Thu, 4 Jul 2024 14:08:48 +0300 Subject: [PATCH 19/34] change the log format --- GANDLF/logging_config.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml index ec305f19d..89b6100b6 100644 --- a/GANDLF/logging_config.yaml +++ b/GANDLF/logging_config.yaml @@ -1,9 +1,11 @@ version: 1 formatters: detailed: + format: "%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(funcName)s:%(lineno)d - %(message)s" + datefmt: "%Y-%m-%d %H:%M:%S" + simple: format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - detailed1: - format: "%(asctime)s - %(name)s - %(levelname)s - %(pathname)s:%(lineno)d - %(message)s" + datefmt: "%Y-%m-%d %H:%M:%S" filters: warnings_filter: (): logging.Filter @@ -14,18 +16,18 @@ handlers: stdoutHandler: # only display info level class: logging.StreamHandler level: INFO - formatter: detailed1 + formatter: simple filters: [info_only_filter] stream: ext://sys.stdout stderrHandler: # display warning and above messages class: logging.StreamHandler level: WARNING - formatter: detailed1 + formatter: detailed stream: ext://sys.stderr rotatingFileHandler: class: logging.handlers.RotatingFileHandler level: DEBUG - formatter: detailed1 + formatter: detailed filename: tmp/gandlf/gandlf.log maxBytes: 10485760 backupCount: 2 From fd434b2bcc4e3d45e741858dc6b1b928b82243c8 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Mon, 8 Jul 2024 18:01:43 +0300 Subject: [PATCH 20/34] Update docs/extending.md Co-authored-by: Sarthak Pati --- docs/extending.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/extending.md b/docs/extending.md index df36f2440..cad5fa62f 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -135,8 +135,7 @@ bash ## Logging ### Use loggers instead of print -We use the native `logging` [library](https://docs.python.org/3/library/logging.html) for logs management. -It is already configured. So, If you are extending the code, please use loggers instead of prints. +We use the native `logging` [library](https://docs.python.org/3/library/logging.html) for logs management. This gets automatically configured when GaNDLF gets launched. So, if you are extending the code, please use loggers instead of prints. Here is an example how logger can be used: From 783cbad5f1e3dbe819c81631154544391b2030d1 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Mon, 8 Jul 2024 18:02:00 +0300 Subject: [PATCH 21/34] Update docs/extending.md Co-authored-by: Sarthak Pati --- docs/extending.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/extending.md b/docs/extending.md index cad5fa62f..4f7e3dde9 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -153,7 +153,7 @@ def my_new_cool_function(df: pd.DataFrame): GaNDLF logs are splitted into multiple parts: - CLI output: only `info` messages are shown here - debug file: all messages are shown -- stderr: display `warning,error,critical` messages +- stderr: display `warning`, `error`, or `critical` messages All the logs are saved in the `tmp/gandlf` directory From 8736c11b8c48722cf5f6a79e29657558b12a20c6 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Mon, 8 Jul 2024 18:03:25 +0300 Subject: [PATCH 22/34] Update docs/extending.md Co-authored-by: Sarthak Pati --- docs/extending.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/extending.md b/docs/extending.md index 4f7e3dde9..9b7806b27 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -164,7 +164,7 @@ Example of log message ``` ### Create your own logger -You can create and configure your own logger in the `GANDLF\logging_config.yaml`. +You can create and configure your own logger by updating the file `GANDLF/logging_config.yaml`. From 935d4a36909006f7f43debd08d1ec44d5dd04771 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Mon, 15 Jul 2024 13:23:08 +0300 Subject: [PATCH 23/34] update gandlf_logger - Update gandlf_logging_setup - Update logging_cofing - Update setup.py --- GANDLF/logging_config.yaml | 10 ++++++-- GANDLF/utils/gandlf_logger.py | 48 ++++++++++++++++++++++++----------- setup.py | 3 ++- testing/test_full.py | 6 +++-- 4 files changed, 47 insertions(+), 20 deletions(-) diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml index 89b6100b6..6bc1fbdac 100644 --- a/GANDLF/logging_config.yaml +++ b/GANDLF/logging_config.yaml @@ -3,8 +3,15 @@ formatters: detailed: format: "%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(funcName)s:%(lineno)d - %(message)s" datefmt: "%Y-%m-%d %H:%M:%S" + log_colors: + DEBUG: "white" + INFO: "green" + WARNING: "yellow" + ERROR: "red" + CRITICAL: "bold_red" simple: - format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + (): colorlog.ColoredFormatter + format: "%(log_color)s%(asctime)s - %(name)s - %(levelname)s - %(message)s" datefmt: "%Y-%m-%d %H:%M:%S" filters: warnings_filter: @@ -28,7 +35,6 @@ handlers: class: logging.handlers.RotatingFileHandler level: DEBUG formatter: detailed - filename: tmp/gandlf/gandlf.log maxBytes: 10485760 backupCount: 2 loggers: # you can add your customized logger diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 2d8412ca4..7f6c309ee 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -2,31 +2,49 @@ import yaml from pathlib import Path from importlib import resources +import os +import colorlog -def gandlf_logger_setup(config_path="logging_config.yaml") -> logging.Logger: +def gandlf_logger_setup(log_dir=None, config_path="logging_config.yaml"): """ - It sets up the logger. Read from logging_config. + It sets up the logger. Reads from logging_config. + If log_dir is None, the logs are flashed to console. Args: - logger_name (str): logger name, the name should be the same in the logging_config + log_dir (str): dir path for saving the logs config_path (str): file path for the configuration - Returns: - logging.Logger - """ - # create dir for storing the messages - current_dir = Path.cwd() - directory = Path.joinpath(current_dir, "tmp/gandlf") - directory.mkdir(parents=True, exist_ok=True) + """ - with resources.open_text("GANDLF", config_path) as file: - config_dict = yaml.safe_load(file) - logging.config.dictConfig(config_dict) + if log_dir == None: # flash logs + formatter = colorlog.ColoredFormatter( + "%(log_color)s%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(funcName)s:%(lineno)d - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + log_colors={ + "DEBUG": "blue", + "INFO": "green", + "WARNING": "yellow", + "ERROR": "red", + "CRITICAL": "bold_red", + }, + ) + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + logging.root.setLevel(logging.DEBUG) + logging.root.addHandler(console_handler) + + else: # create the log file + output_dir = Path(log_dir) + Path(output_dir).mkdir(parents=True, exist_ok=True) + with resources.open_text("GANDLF", config_path) as file: + config_dict = yaml.safe_load(file) + config_dict["handlers"]["rotatingFileHandler"]["filename"] = str( + Path.joinpath(output_dir, "gandlf.log") + ) + logging.config.dictConfig(config_dict) logging.captureWarnings(True) - # return logging.getLogger(logger_name) - class InfoOnlyFilter(logging.Filter): """ diff --git a/setup.py b/setup.py index a5f9d5c09..1ee6a3f72 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ ] # Any extra files should be located at `GANDLF` module folder (not in repo root) -extra_files = ["logging_config.yml"] +extra_files = ["logging_config.yaml"] toplevel_package_excludes = ["testing*"] # specifying version for `black` separately because it is also used to [check for lint](https://github.com/mlcommons/GaNDLF/blob/master/.github/workflows/black.yml) @@ -80,6 +80,7 @@ "deprecated", "packaging==24.0", "typer==0.9.0", + "colorlog", ] if __name__ == "__main__": diff --git a/testing/test_full.py b/testing/test_full.py index ce0c78293..df080e32b 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -3190,16 +3190,18 @@ def test_generic_data_split(): def test_generic_logging(capsys): print("52: Starting test for logging") - gandlf_logger_setup() + gandlf_logger_setup("testing/log") message = "Testing logging" logging.debug(message) # tests if the message is in the file.log - with open("tmp/gandlf/gandlf.log", "r") as log_file: + with open("testing/log/gandlf.log", "r") as log_file: logs = log_file.read() assert message in logs + shutil.rmtree("testing/log") + # test the stout info level. The stout must show only INFO messages message = "Testing stout logging" logging.info(message) From af4647234199fa878f1be4c9b70c615c975b0f7a Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Mon, 15 Jul 2024 13:43:34 +0300 Subject: [PATCH 24/34] remove unnecessary imports --- GANDLF/utils/gandlf_logger.py | 1 - 1 file changed, 1 deletion(-) diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 7f6c309ee..37e117ec0 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -2,7 +2,6 @@ import yaml from pathlib import Path from importlib import resources -import os import colorlog From 36da4d2a509ee2d43b1ade6da399b1978f045312 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Thu, 18 Jul 2024 17:44:11 +0300 Subject: [PATCH 25/34] update gandlf_logger_setup --- GANDLF/logging_config.yaml | 2 +- GANDLF/utils/gandlf_logger.py | 72 +++++++++++++++++++++-------------- docs/extending.md | 6 ++- testing/test_full.py | 10 ++--- 4 files changed, 54 insertions(+), 36 deletions(-) diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml index 6bc1fbdac..382a29eb6 100644 --- a/GANDLF/logging_config.yaml +++ b/GANDLF/logging_config.yaml @@ -11,7 +11,7 @@ formatters: CRITICAL: "bold_red" simple: (): colorlog.ColoredFormatter - format: "%(log_color)s%(asctime)s - %(name)s - %(levelname)s - %(message)s" + format: "%(log_color)s%(asctime)s - %(levelname)s - %(message)s" datefmt: "%Y-%m-%d %H:%M:%S" filters: warnings_filter: diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 37e117ec0..51a7bdbe0 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -5,44 +5,58 @@ import colorlog -def gandlf_logger_setup(log_dir=None, config_path="logging_config.yaml"): +def _flash_to_console(): + formatter = colorlog.ColoredFormatter( + "%(log_color)s%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + log_colors={ + "DEBUG": "blue", + "INFO": "green", + "WARNING": "yellow", + "ERROR": "red", + "CRITICAL": "bold_red", + }, + ) + console_handler = logging.StreamHandler() + console_handler.setFormatter(formatter) + logging.root.setLevel(logging.DEBUG) + logging.root.addHandler(console_handler) + + +def _create_log_file(log_file): + log_file = Path(log_file) + log_file.write_text("Starting GaNDLF logging session \n") + + +def _save_logs_in_file(log_file, config_path): + _create_log_file(log_file) + with resources.open_text("GANDLF", config_path) as file: + config_dict = yaml.safe_load(file) + config_dict["handlers"]["rotatingFileHandler"]["filename"] = str(log_file) + logging.config.dictConfig(config_dict) + + +def gandlf_logger_setup(log_file=None, config_path="logging_config.yaml"): """ It sets up the logger. Reads from logging_config. - If log_dir is None, the logs are flashed to console. + Args: - log_dir (str): dir path for saving the logs + log_file (str): dir path for saving the logs, defaults to `None`, at which time logs are flushed to console. config_path (str): file path for the configuration """ - if log_dir == None: # flash logs - formatter = colorlog.ColoredFormatter( - "%(log_color)s%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(funcName)s:%(lineno)d - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - log_colors={ - "DEBUG": "blue", - "INFO": "green", - "WARNING": "yellow", - "ERROR": "red", - "CRITICAL": "bold_red", - }, - ) - console_handler = logging.StreamHandler() - console_handler.setFormatter(formatter) - logging.root.setLevel(logging.DEBUG) - logging.root.addHandler(console_handler) + logging.captureWarnings(True) - else: # create the log file - output_dir = Path(log_dir) - Path(output_dir).mkdir(parents=True, exist_ok=True) - with resources.open_text("GANDLF", config_path) as file: - config_dict = yaml.safe_load(file) - config_dict["handlers"]["rotatingFileHandler"]["filename"] = str( - Path.joinpath(output_dir, "gandlf.log") - ) - logging.config.dictConfig(config_dict) + if log_file is None: # flash logs + _flash_to_console() - logging.captureWarnings(True) + else: # create the log file + try: + _save_logs_in_file(log_file, config_path) + except Exception as e: + logging.error(f"log_file:{e}") + logging.warning("The logs will be flushed to console") class InfoOnlyFilter(logging.Filter): diff --git a/docs/extending.md b/docs/extending.md index 9b7806b27..69ba06268 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -155,7 +155,11 @@ GaNDLF logs are splitted into multiple parts: - debug file: all messages are shown - stderr: display `warning`, `error`, or `critical` messages -All the logs are saved in the `tmp/gandlf` directory +By default, the logs are flushed to console. +The logs are **saved** in the path that is defined by the '--log-file' parameter in the CLI commands. +If the path is not provided or an error is raised, the logs will be flushed to console. + + Example of log message ``` diff --git a/testing/test_full.py b/testing/test_full.py index df080e32b..86a3c7049 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -3189,18 +3189,18 @@ def test_generic_data_split(): def test_generic_logging(capsys): print("52: Starting test for logging") - - gandlf_logger_setup("testing/log") + log_file = "testing/gandlf.log" + gandlf_logger_setup(log_file) message = "Testing logging" logging.debug(message) # tests if the message is in the file.log - with open("testing/log/gandlf.log", "r") as log_file: - logs = log_file.read() + with open(log_file, "r") as file: + logs = file.read() assert message in logs - shutil.rmtree("testing/log") + os.remove(log_file) # test the stout info level. The stout must show only INFO messages message = "Testing stout logging" From d357fff086c1943f5cf8b0112f513f293fe70f6b Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Thu, 18 Jul 2024 17:58:54 +0300 Subject: [PATCH 26/34] Update GANDLF/utils/gandlf_logger.py Co-authored-by: Sarthak Pati --- GANDLF/utils/gandlf_logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 51a7bdbe0..8b8d63600 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -5,7 +5,7 @@ import colorlog -def _flash_to_console(): +def _flush_to_console(): formatter = colorlog.ColoredFormatter( "%(log_color)s%(asctime)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", From 3051b4b84064ecb18c63c7b962a4c9fb22bed175 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Thu, 18 Jul 2024 17:59:08 +0300 Subject: [PATCH 27/34] Update GANDLF/utils/gandlf_logger.py Co-authored-by: Sarthak Pati --- GANDLF/utils/gandlf_logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 8b8d63600..c2efefe83 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -48,8 +48,8 @@ def gandlf_logger_setup(log_file=None, config_path="logging_config.yaml"): logging.captureWarnings(True) - if log_file is None: # flash logs - _flash_to_console() + if log_file is None: # flush logs + _flush_to_console() else: # create the log file try: From ca076df41dea6d69baba12363b81c33d2a572ec3 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Fri, 19 Jul 2024 17:48:43 +0300 Subject: [PATCH 28/34] change the default to create a tmp file --- GANDLF/utils/gandlf_logger.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index c2efefe83..90e1e9a6c 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -3,6 +3,8 @@ from pathlib import Path from importlib import resources import colorlog +import tempfile +from GANDLF.utils import get_unique_timestamp def _flush_to_console(): @@ -23,13 +25,21 @@ def _flush_to_console(): logging.root.addHandler(console_handler) +def _create_tmp_log_file(): + tmp_dir = Path(tempfile.gettempdir()) + log_dir = Path.joinpath(tmp_dir, ".gandlf") + log_dir.mkdir(parents=True, exist_ok=True) + log_file = Path.joinpath(log_dir, get_unique_timestamp() + ".log") + _create_log_file(log_file) + return log_file + + def _create_log_file(log_file): log_file = Path(log_file) log_file.write_text("Starting GaNDLF logging session \n") def _save_logs_in_file(log_file, config_path): - _create_log_file(log_file) with resources.open_text("GANDLF", config_path) as file: config_dict = yaml.safe_load(file) config_dict["handlers"]["rotatingFileHandler"]["filename"] = str(log_file) @@ -47,16 +57,18 @@ def gandlf_logger_setup(log_file=None, config_path="logging_config.yaml"): """ logging.captureWarnings(True) - - if log_file is None: # flush logs - _flush_to_console() - - else: # create the log file - try: + try: + if log_file is None: # create tmp file + log_tmp_file = _create_tmp_log_file() + _save_logs_in_file(log_tmp_file, config_path) + logging.info(f"The logs are saved in {log_tmp_file}") + else: # create the log file + _create_log_file(log_file) _save_logs_in_file(log_file, config_path) - except Exception as e: - logging.error(f"log_file:{e}") - logging.warning("The logs will be flushed to console") + except Exception as e: + _flush_to_console() + logging.error(f"log_file:{e}") + logging.warning("The logs will be flushed to console") class InfoOnlyFilter(logging.Filter): From e129be1cb8682e8ba01b735bd3066bb345eb9806 Mon Sep 17 00:00:00 2001 From: "V.Malefioudakis" Date: Fri, 19 Jul 2024 18:02:16 +0300 Subject: [PATCH 29/34] fix the error --- GANDLF/compute/forward_pass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/compute/forward_pass.py b/GANDLF/compute/forward_pass.py index 79d16c3a3..be2bff034 100644 --- a/GANDLF/compute/forward_pass.py +++ b/GANDLF/compute/forward_pass.py @@ -113,7 +113,7 @@ def validate_network( tqdm(valid_dataloader, desc="Looping over " + mode + " data") ): if params["verbose"]: - logging.debug(f'== Current subject: {subject["subject_id"]}') + print("== Current subject:", subject["subject_id"], flush=True) # ensure spacing is always present in params and is always subject-specific params["subject_spacing"] = None From 05fa7339f1258431dee307516a6ad1494efe844f Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Fri, 19 Jul 2024 18:19:45 +0300 Subject: [PATCH 30/34] Update GANDLF/utils/gandlf_logger.py Co-authored-by: Sarthak Pati --- GANDLF/utils/gandlf_logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 90e1e9a6c..987bac077 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -46,7 +46,7 @@ def _save_logs_in_file(log_file, config_path): logging.config.dictConfig(config_dict) -def gandlf_logger_setup(log_file=None, config_path="logging_config.yaml"): +def gandlf_logger_setup(log_file=None, config_path="logging_config.yaml") -> None: """ It sets up the logger. Reads from logging_config. From e5bfd2328418169ae33465837bdd0a7fcb70d550 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Fri, 19 Jul 2024 18:46:42 +0300 Subject: [PATCH 31/34] made proposed changes --- GANDLF/utils/gandlf_logger.py | 38 ++++++----------------------------- setup.py | 1 - 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logger.py index 987bac077..b7370b365 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logger.py @@ -2,35 +2,15 @@ import yaml from pathlib import Path from importlib import resources -import colorlog import tempfile from GANDLF.utils import get_unique_timestamp -def _flush_to_console(): - formatter = colorlog.ColoredFormatter( - "%(log_color)s%(asctime)s - %(levelname)s - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S", - log_colors={ - "DEBUG": "blue", - "INFO": "green", - "WARNING": "yellow", - "ERROR": "red", - "CRITICAL": "bold_red", - }, - ) - console_handler = logging.StreamHandler() - console_handler.setFormatter(formatter) - logging.root.setLevel(logging.DEBUG) - logging.root.addHandler(console_handler) - - def _create_tmp_log_file(): tmp_dir = Path(tempfile.gettempdir()) log_dir = Path.joinpath(tmp_dir, ".gandlf") log_dir.mkdir(parents=True, exist_ok=True) log_file = Path.joinpath(log_dir, get_unique_timestamp() + ".log") - _create_log_file(log_file) return log_file @@ -57,18 +37,12 @@ def gandlf_logger_setup(log_file=None, config_path="logging_config.yaml") -> Non """ logging.captureWarnings(True) - try: - if log_file is None: # create tmp file - log_tmp_file = _create_tmp_log_file() - _save_logs_in_file(log_tmp_file, config_path) - logging.info(f"The logs are saved in {log_tmp_file}") - else: # create the log file - _create_log_file(log_file) - _save_logs_in_file(log_file, config_path) - except Exception as e: - _flush_to_console() - logging.error(f"log_file:{e}") - logging.warning("The logs will be flushed to console") + log_tmp_file = log_file + if log_file is None: # create tmp file + log_tmp_file = _create_tmp_log_file() + logging.info(f"The logs are saved in {log_tmp_file}") + _create_log_file(log_tmp_file) + _save_logs_in_file(log_tmp_file, config_path) class InfoOnlyFilter(logging.Filter): diff --git a/setup.py b/setup.py index 1ee6a3f72..6487b636a 100644 --- a/setup.py +++ b/setup.py @@ -80,7 +80,6 @@ "deprecated", "packaging==24.0", "typer==0.9.0", - "colorlog", ] if __name__ == "__main__": From f857e65594176a79c255ad96903eb55185658908 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Fri, 19 Jul 2024 19:07:18 +0300 Subject: [PATCH 32/34] Update setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 6487b636a..1ee6a3f72 100644 --- a/setup.py +++ b/setup.py @@ -80,6 +80,7 @@ "deprecated", "packaging==24.0", "typer==0.9.0", + "colorlog", ] if __name__ == "__main__": From f4252892e520458dad5726107a6582ac37639c7b Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Sat, 20 Jul 2024 18:12:44 +0300 Subject: [PATCH 33/34] change the def name to logger_setup --- GANDLF/entrypoints/anonymizer.py | 4 ++-- GANDLF/entrypoints/cli_tool.py | 4 ++-- GANDLF/entrypoints/collect_stats.py | 4 ++-- GANDLF/entrypoints/config_generator.py | 4 ++-- GANDLF/entrypoints/construct_csv.py | 4 ++-- GANDLF/entrypoints/debug_info.py | 4 ++-- GANDLF/entrypoints/deploy.py | 4 ++-- GANDLF/entrypoints/generate_metrics.py | 4 ++-- GANDLF/entrypoints/optimize_model.py | 4 ++-- GANDLF/entrypoints/patch_miner.py | 4 ++-- GANDLF/entrypoints/preprocess.py | 4 ++-- GANDLF/entrypoints/recover_config.py | 4 ++-- GANDLF/entrypoints/run.py | 4 ++-- GANDLF/entrypoints/split_csv.py | 4 ++-- GANDLF/entrypoints/verify_install.py | 4 ++-- GANDLF/logging_config.yaml | 2 +- GANDLF/utils/__init__.py | 2 +- GANDLF/utils/{gandlf_logger.py => gandlf_logging.py} | 2 +- testing/test_full.py | 2 +- 19 files changed, 34 insertions(+), 34 deletions(-) rename GANDLF/utils/{gandlf_logger.py => gandlf_logging.py} (95%) diff --git a/GANDLF/entrypoints/anonymizer.py b/GANDLF/entrypoints/anonymizer.py index e990f9d14..8adc7fcc8 100644 --- a/GANDLF/entrypoints/anonymizer.py +++ b/GANDLF/entrypoints/anonymizer.py @@ -12,7 +12,7 @@ from GANDLF.anonymize import run_anonymizer from GANDLF.cli import copyrightMessage from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils.gandlf_logging import logger_setup def _anonymize_images( @@ -78,7 +78,7 @@ def new_way(input_dir, config, modality, output_file): + "`gandlf_anonymizer` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_Anonymize", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/cli_tool.py b/GANDLF/entrypoints/cli_tool.py index 7a61e58eb..57fdc0ed8 100644 --- a/GANDLF/entrypoints/cli_tool.py +++ b/GANDLF/entrypoints/cli_tool.py @@ -3,7 +3,7 @@ import click from .subcommands import cli_subcommands from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils import gandlf_logger_setup +from GANDLF.utils import logger_setup from GANDLF import version @@ -25,7 +25,7 @@ def gandlf(ctx, loglevel): ctx.ensure_object(dict) ctx.obj["LOGLEVEL"] = loglevel # setup_logging(loglevel) - gandlf_logger_setup() + logger_setup() # registers subcommands: `gandlf anonymizer`, `gandlf run`, etc. diff --git a/GANDLF/entrypoints/collect_stats.py b/GANDLF/entrypoints/collect_stats.py index 81da499b9..a418686a1 100644 --- a/GANDLF/entrypoints/collect_stats.py +++ b/GANDLF/entrypoints/collect_stats.py @@ -14,7 +14,7 @@ from GANDLF.cli import copyrightMessage from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils import gandlf_logger_setup +from GANDLF.utils import logger_setup def plot_all(df_training, df_validation, df_testing, output_plot_dir): @@ -206,7 +206,7 @@ def new_way(model_dir: str, output_dir: str): + "`gandlf_collectStats` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_CollectStats", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/config_generator.py b/GANDLF/entrypoints/config_generator.py index 7be011790..861d80077 100644 --- a/GANDLF/entrypoints/config_generator.py +++ b/GANDLF/entrypoints/config_generator.py @@ -4,7 +4,7 @@ from GANDLF.cli import config_generator, copyrightMessage from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _generate_config(config: str, strategy: str, output: str): @@ -47,7 +47,7 @@ def new_way(config, strategy, output): + "`gandlf_configGenerator` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_ConfigGenerator", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/construct_csv.py b/GANDLF/entrypoints/construct_csv.py index 342825180..b8e7c2994 100644 --- a/GANDLF/entrypoints/construct_csv.py +++ b/GANDLF/entrypoints/construct_csv.py @@ -14,7 +14,7 @@ from GANDLF.utils import writeTrainingCSV from GANDLF.cli import copyrightMessage -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _construct_csv( @@ -120,7 +120,7 @@ def new_way( + "`gandlf_constructCSV` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_ConstructCSV", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/debug_info.py b/GANDLF/entrypoints/debug_info.py index e5f2d8a25..a179513a1 100644 --- a/GANDLF/entrypoints/debug_info.py +++ b/GANDLF/entrypoints/debug_info.py @@ -7,7 +7,7 @@ from GANDLF import __version__ from GANDLF.entrypoints import append_copyright_to_help from GANDLF.utils import get_git_hash -from GANDLF.utils import gandlf_logger_setup +from GANDLF.utils import logger_setup def _debug_info(): @@ -39,7 +39,7 @@ def new_way(): ) def old_way(): _debug_info() - gandlf_logger_setup() + logger_setup() if __name__ == "__main__": diff --git a/GANDLF/entrypoints/deploy.py b/GANDLF/entrypoints/deploy.py index dba542ee1..f24a25cf1 100644 --- a/GANDLF/entrypoints/deploy.py +++ b/GANDLF/entrypoints/deploy.py @@ -16,7 +16,7 @@ copyrightMessage, ) from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _deploy( @@ -158,7 +158,7 @@ def new_way( + "`gandlf_deploy` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_Deploy", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/generate_metrics.py b/GANDLF/entrypoints/generate_metrics.py index d863ed44a..5d589a9f0 100644 --- a/GANDLF/entrypoints/generate_metrics.py +++ b/GANDLF/entrypoints/generate_metrics.py @@ -10,7 +10,7 @@ from GANDLF.cli import copyrightMessage from GANDLF.cli.generate_metrics import generate_metrics_dict from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _generate_metrics( @@ -82,7 +82,7 @@ def new_way( + "`gandlf_generateMetrics` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_Metrics", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/optimize_model.py b/GANDLF/entrypoints/optimize_model.py index 4afbbc398..021f4d65f 100644 --- a/GANDLF/entrypoints/optimize_model.py +++ b/GANDLF/entrypoints/optimize_model.py @@ -9,7 +9,7 @@ from GANDLF.cli import copyrightMessage, post_training_model_optimization from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _optimize_model( @@ -61,7 +61,7 @@ def new_way( + "`gandlf_optimizeModel` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_OptimizeModel", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/patch_miner.py b/GANDLF/entrypoints/patch_miner.py index 44e7f4239..58b041129 100644 --- a/GANDLF/entrypoints/patch_miner.py +++ b/GANDLF/entrypoints/patch_miner.py @@ -10,7 +10,7 @@ from GANDLF.cli.patch_extraction import patch_extraction from GANDLF.cli import copyrightMessage from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _mine_patches(input_path: str, output_dir: str, config: Optional[str]): @@ -56,7 +56,7 @@ def new_way(input_csv: str, output_dir: str, config: Optional[str]): + "`gandlf_patchMiner` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_PatchMiner", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/preprocess.py b/GANDLF/entrypoints/preprocess.py index eccb00fb4..0644b1d53 100644 --- a/GANDLF/entrypoints/preprocess.py +++ b/GANDLF/entrypoints/preprocess.py @@ -8,7 +8,7 @@ from deprecated import deprecated from GANDLF.cli import preprocess_and_save, copyrightMessage from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _preprocess( @@ -111,7 +111,7 @@ def new_way( + "`gandlf_preprocess` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_Preprocess", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/recover_config.py b/GANDLF/entrypoints/recover_config.py index 36bed0070..6168b2ad1 100644 --- a/GANDLF/entrypoints/recover_config.py +++ b/GANDLF/entrypoints/recover_config.py @@ -9,7 +9,7 @@ from GANDLF.cli import copyrightMessage, recover_config from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _recover_config(model_dir: Optional[str], mlcube: bool, output_file: str): @@ -64,7 +64,7 @@ def new_way(model_dir, mlcube, output_file): + "`gandlf_recoverConfig` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_RecoverConfig", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/run.py b/GANDLF/entrypoints/run.py index 5c11a7091..f974acdc1 100644 --- a/GANDLF/entrypoints/run.py +++ b/GANDLF/entrypoints/run.py @@ -14,7 +14,7 @@ from GANDLF import version from GANDLF.cli import main_run, copyrightMessage from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _run( @@ -185,7 +185,7 @@ def new_way( + "`gandlf_run` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/split_csv.py b/GANDLF/entrypoints/split_csv.py index eecda7c6c..7261e5fe8 100644 --- a/GANDLF/entrypoints/split_csv.py +++ b/GANDLF/entrypoints/split_csv.py @@ -12,7 +12,7 @@ from GANDLF.cli import copyrightMessage, split_data_and_save_csvs from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _split_csv(input_csv: str, output_dir: str, config_path: Optional[str]): @@ -65,7 +65,7 @@ def new_way(input_csv: str, output_dir: str, config: Optional[str]): + "`gandlf_splitCSV` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() parser = argparse.ArgumentParser( prog="GANDLF_SplitCSV", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/entrypoints/verify_install.py b/GANDLF/entrypoints/verify_install.py index 549ad2ad8..970b78a6f 100644 --- a/GANDLF/entrypoints/verify_install.py +++ b/GANDLF/entrypoints/verify_install.py @@ -6,7 +6,7 @@ from deprecated import deprecated from GANDLF.entrypoints import append_copyright_to_help -from GANDLF.utils.gandlf_logger import gandlf_logger_setup +from GANDLF.utils import logger_setup def _verify_install(): @@ -43,7 +43,7 @@ def new_way(): + "`gandlf_verifyInstall` script would be deprecated soon." ) def old_way(): - gandlf_logger_setup() + logger_setup() argparse.ArgumentParser( prog="GANDLF_VerifyInstall", formatter_class=argparse.RawTextHelpFormatter, diff --git a/GANDLF/logging_config.yaml b/GANDLF/logging_config.yaml index 382a29eb6..d569aa90f 100644 --- a/GANDLF/logging_config.yaml +++ b/GANDLF/logging_config.yaml @@ -18,7 +18,7 @@ filters: (): logging.Filter name: "py.warnings" info_only_filter: - (): GANDLF.utils.gandlf_logger.InfoOnlyFilter + (): GANDLF.utils.gandlf_logging.InfoOnlyFilter handlers: stdoutHandler: # only display info level class: logging.StreamHandler diff --git a/GANDLF/utils/__init__.py b/GANDLF/utils/__init__.py index 2466b1435..4c2233153 100644 --- a/GANDLF/utils/__init__.py +++ b/GANDLF/utils/__init__.py @@ -68,4 +68,4 @@ ) from .data_splitter import split_data -from .gandlf_logger import gandlf_logger_setup, InfoOnlyFilter +from .gandlf_logging import logger_setup, InfoOnlyFilter diff --git a/GANDLF/utils/gandlf_logger.py b/GANDLF/utils/gandlf_logging.py similarity index 95% rename from GANDLF/utils/gandlf_logger.py rename to GANDLF/utils/gandlf_logging.py index b7370b365..5360e64bf 100644 --- a/GANDLF/utils/gandlf_logger.py +++ b/GANDLF/utils/gandlf_logging.py @@ -26,7 +26,7 @@ def _save_logs_in_file(log_file, config_path): logging.config.dictConfig(config_dict) -def gandlf_logger_setup(log_file=None, config_path="logging_config.yaml") -> None: +def logger_setup(log_file=None, config_path="logging_config.yaml") -> None: """ It sets up the logger. Reads from logging_config. diff --git a/testing/test_full.py b/testing/test_full.py index 86a3c7049..8c8c19f75 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -3190,7 +3190,7 @@ def test_generic_data_split(): def test_generic_logging(capsys): print("52: Starting test for logging") log_file = "testing/gandlf.log" - gandlf_logger_setup(log_file) + logger_setup(log_file) message = "Testing logging" logging.debug(message) From 1364c227ea2d022ec7b291150874b85dadef3605 Mon Sep 17 00:00:00 2001 From: Veniamin Malefioudakis <58257722+benmalef@users.noreply.github.com> Date: Tue, 23 Jul 2024 18:23:43 +0300 Subject: [PATCH 34/34] made some code changes --- GANDLF/entrypoints/cli_tool.py | 13 +------------ GANDLF/utils/gandlf_logging.py | 4 ++-- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/GANDLF/entrypoints/cli_tool.py b/GANDLF/entrypoints/cli_tool.py index 57fdc0ed8..668d36d60 100644 --- a/GANDLF/entrypoints/cli_tool.py +++ b/GANDLF/entrypoints/cli_tool.py @@ -7,24 +7,13 @@ from GANDLF import version -def setup_logging(loglevel): - logging.basicConfig(level=loglevel.upper()) - - @click.group() @click.version_option(version, "--version", "-v", message="GANDLF Version: %(version)s") -@click.option( - "--loglevel", - default="INFO", - help="Set the logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)", -) @click.pass_context # Pass the context to subcommands @append_copyright_to_help -def gandlf(ctx, loglevel): +def gandlf(ctx): """GANDLF command-line tool.""" ctx.ensure_object(dict) - ctx.obj["LOGLEVEL"] = loglevel - # setup_logging(loglevel) logger_setup() diff --git a/GANDLF/utils/gandlf_logging.py b/GANDLF/utils/gandlf_logging.py index 5360e64bf..9c376aaf0 100644 --- a/GANDLF/utils/gandlf_logging.py +++ b/GANDLF/utils/gandlf_logging.py @@ -19,7 +19,7 @@ def _create_log_file(log_file): log_file.write_text("Starting GaNDLF logging session \n") -def _save_logs_in_file(log_file, config_path): +def _configure_logging_with_logfile(log_file, config_path): with resources.open_text("GANDLF", config_path) as file: config_dict = yaml.safe_load(file) config_dict["handlers"]["rotatingFileHandler"]["filename"] = str(log_file) @@ -42,7 +42,7 @@ def logger_setup(log_file=None, config_path="logging_config.yaml") -> None: log_tmp_file = _create_tmp_log_file() logging.info(f"The logs are saved in {log_tmp_file}") _create_log_file(log_tmp_file) - _save_logs_in_file(log_tmp_file, config_path) + _configure_logging_with_logfile(log_tmp_file, config_path) class InfoOnlyFilter(logging.Filter):