lincc-frameworks · mtauraso · Sep 20, 2024 · Sep 19, 2024 · Sep 20, 2024 · drewoldag
diff --git a/src/fibad/config_utils.py b/src/fibad/config_utils.py
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 from typing import Union
 
@@ -6,6 +7,160 @@
 DEFAULT_CONFIG_FILEPATH = Path(__file__).parent.resolve() / "fibad_default_config.toml"
 DEFAULT_USER_CONFIG_FILEPATH = Path.cwd() / "fibad_config.toml"
 
+logger = logging.getLogger(__name__)
+
+
+class ConfigDict(dict):
+    """The purpose of this class is to ensure key errors on config dictionaries return something helpful.
+    and to discourage mutation actions on config dictionaries that should not happen at runtime.
+    """
+
+    # TODO: Should there be some sort of "bake" method which occurs after config processing, and
+    # percolates down to nested ConfigDicts and prevents __setitem__ and other mutations of dictionary
+    # values? i.e. a method to make a config dictionary fully immutable (or very difficult/annoying to
+    # mutuate) before we pass control to possibly external module code that is relying on the dictionary
+    # to be static througout the run.
+
+    __slots__ = ()  # we don't need __dict__ on this object at all.
+
+    def __init__(self, map: dict, **kwargs):
+        super().__init__(map, **kwargs)
+
+        # Replace all dictionary keys with values recursively.
+        for key in self:
+            if isinstance(self[key], dict) and not isinstance(self[key], ConfigDict):
+                self[key] = ConfigDict(map=self[key])
+
+    def __missing__(self, key):
+        msg = f"Accessed configuration key/section {key} which has not been defined. "
+        msg += "All configuration keys and sections must be defined in {DEFAULT_CONFIG_FILEPATH}"
+        logger.fatal(msg)
+        raise RuntimeError(msg)
+
+    def get(self, key, default=None):
+        """Nonfunctional stub of dict.get() which errors always"""
+        msg = f"ConfigDict.get({key},{default}) called. "
+        msg += "Please index config dictionaries with [] or __getitem__() only. "
+        msg += "Configuration keys and sections must be defined in {DEFAULT_CONFIG_FILEPATH}"
+        logger.fatal(msg)
+        raise RuntimeError(msg)
+
+    def __delitem__(self, key):
+        raise RuntimeError("Removing keys or sections from a ConfigDict using del is not supported")
+
+    def pop(self, key, default):
+        """Nonfunctional stub of dict.pop() which errors always"""
+        raise RuntimeError("Removing keys or sections from a ConfigDict using pop() is not supported")
+
+    def popitem(self):
+        """Nonfunctional stub of dict.popitem() which errors always"""
+        raise RuntimeError("Removing keys or sections from a ConfigDict using popitem() is not supported")
+
+    def clear(self):
+        """Nonfunctional stub of dict.clear() which errors always"""
+        raise RuntimeError("Removing keys or sections from a ConfigDict using clear() is not supported")
+
+
+def validate_runtime_config(runtime_config: ConfigDict):
+    """Validates that defaults exist for every config value before we begin to use a config.
+
+    This should be called at the moment the runtime config is fully baked for science calculations. Meaning
+    that all sources of config info have been combined in `runtime_config` and there are no further
+    config altering operations that will be performed.
+
+    Parameters
+    ----------
+    runtime_config : ConfigDict
+        The current runtime config dictionary.
+
+    Raises
+    ------
+    RuntimeError
+        Raised if any config that exists in the runtime config does not have a default defined
+    """
+    default_config = _read_runtime_config(DEFAULT_CONFIG_FILEPATH)
+    _validate_runtime_config(runtime_config, default_config)
+
+
+def _validate_runtime_config(runtime_config: ConfigDict, default_config: ConfigDict):
+    """Recursive helper for validate_runtime_config.
+
+    The two arguments passed in must represent the same nesting level of the runtime config and all
+    default config parameters respectively.
+
+    Parameters
+    ----------
+    runtime_config : ConfigDict
+        Nested config dictionary representing the runtime config.
+    default_config : ConfigDict
+        Nested config dictionary representing the defaults
+
+    Raises
+    ------
+    RuntimeError
+        Raised if any config that exists in the runtime config does not have a default defined in
+        default_config
+    """
+    for key in runtime_config:
+        if key not in default_config:
+            msg = f"Runtime config contains key or section {key} which has no default defined."
+            msg += f"All configuration keys and sections must be defined in {DEFAULT_CONFIG_FILEPATH}"
+            raise RuntimeError(msg)
+
+        if isinstance(runtime_config[key], dict):
+            _validate_runtime_config(runtime_config[key], default_config[key])
+
+
+def _read_runtime_config(config_filepath: Union[Path, str] = DEFAULT_CONFIG_FILEPATH) -> ConfigDict:
+    """Read a single toml file and return a config dictionary
+
+    Parameters
+    ----------
+    config_filepath : Union[Path, str], optional
+        What file is to be read, by default DEFAULT_CONFIG_FILEPATH
+
+    Returns
+    -------
+    ConfigDict
+        The contents of that toml file as nested ConfigDicts
+    """
+    with open(config_filepath, "r") as f:
+        parsed_dict = toml.load(f)
+        return ConfigDict(parsed_dict)
+
+
+def resolve_runtime_config(runtime_config_filepath: Union[Path, str, None] = None) -> Path:
+    """Resolve a user-supplied runtime config to where we will actually pull config from.
+
+    1) If a runtime config file is specified, we will use that file
+    2) If not file is specified and there is a file named "fibad_config.toml" in the cwd we will use that file
+    3) If no file is specified and there is no file named "fibad_config.toml" in the current working directory
+       we will exclusively work off the configuration defaults in the packaged "fibad_default_config.toml"
+       file.
+
+    Parameters
+    ----------
+    runtime_config_filepath : Union[Path, str, None], optional
+        Location of the supplied config file, by default None
+
+    Returns
+    -------
+    Path
+        Path to the configuration file ultimately used for config resolution. When we fall back to the
+        package supplied default config file, the Path to that file is returned.
+    """
+    if isinstance(runtime_config_filepath, str):
+        runtime_config_filepath = Path(runtime_config_filepath)
+
+    # If a named config exists in cwd, and no config specified on cmdline, use cwd.
+    if runtime_config_filepath is None and DEFAULT_USER_CONFIG_FILEPATH.exists():
+        runtime_config_filepath = DEFAULT_USER_CONFIG_FILEPATH
+
+    if runtime_config_filepath is None:
+        runtime_config_filepath = DEFAULT_CONFIG_FILEPATH
+
+    return runtime_config_filepath
+
 
 def get_runtime_config(
     runtime_config_filepath: Union[Path, str, None] = None,
@@ -33,24 +188,14 @@
         The parsed runtime configuration.
     """
 
-    if isinstance(runtime_config_filepath, str):
-        runtime_config_filepath = Path(runtime_config_filepath)
-
-    with open(default_config_filepath, "r") as f:
-        default_runtime_config = toml.load(f)
-
-    # If a named config exists in cwd, and no config specified on cmdline, use cwd.
-    if runtime_config_filepath is None and DEFAULT_USER_CONFIG_FILEPATH.exists():
-        runtime_config_filepath = DEFAULT_USER_CONFIG_FILEPATH
+    runtime_config_filepath = resolve_runtime_config(runtime_config_filepath)
+    default_runtime_config = _read_runtime_config(default_config_filepath)
 
-    if runtime_config_filepath is not None:
+    if runtime_config_filepath is not DEFAULT_CONFIG_FILEPATH:
         if not runtime_config_filepath.exists():
             raise FileNotFoundError(f"Runtime configuration file not found: {runtime_config_filepath}")
-
-        with open(runtime_config_filepath, "r") as f:
-            users_runtime_config = toml.load(f)
-
-            final_runtime_config = merge_configs(default_runtime_config, users_runtime_config)
+        users_runtime_config = _read_runtime_config(runtime_config_filepath)
+        final_runtime_config = merge_configs(default_runtime_config, users_runtime_config)
     else:
         final_runtime_config = default_runtime_config
 
@@ -80,7 +225,7 @@
     final_config = default_config.copy()
     for k, v in user_config.items():
         if k in final_config and isinstance(final_config[k], dict) and isinstance(v, dict):
-            final_config[k] = merge_configs(default_config.get(k, {}), v)
+            final_config[k] = merge_configs(default_config[k], v)
         else:
             final_config[k] = v
 

diff --git a/src/fibad/data_loaders/data_loader_registry.py b/src/fibad/data_loaders/data_loader_registry.py
@@ -36,7 +36,7 @@
         If no data loader was specified in the runtime configuration.
     """
 
-    data_loader_config = runtime_config.get("data_loader", {})
+    data_loader_config = runtime_config["data_loader"]
     data_loader_cls = None
 
     try:

diff --git a/src/fibad/data_loaders/example_cifar_data_loader.py b/src/fibad/data_loaders/example_cifar_data_loader.py
@@ -9,8 +9,8 @@
 
 @fibad_data_loader
 class CifarDataLoader:
-    def __init__(self, data_loader_config):
-        self.config = data_loader_config
+    def __init__(self, config):
+        self.config = config
 
     def shape(self):
         return (3, 32, 32)
@@ -31,13 +31,13 @@
         )
 
         return torchvision.datasets.CIFAR10(
-            root=self.config.get("path", "./data"), train=True, download=True, transform=transform
+            root=self.config["general"]["data_dir"], train=True, download=True, transform=transform
         )
 
     def data_loader(self, data_set):
         return torch.utils.data.DataLoader(
             data_set,
-            batch_size=self.config.get("batch_size", 4),
-            shuffle=self.config.get("shuffle", True),
-            num_workers=self.config.get("num_workers", 2),
+            batch_size=self.config["data_loader"]["batch_size"],
+            shuffle=self.config["data_loader"]["shuffle"],
+            num_workers=self.config["data_loader"]["num_workers"],
         )
diff --git a/src/fibad/data_loaders/hsc_data_loader.py b/src/fibad/data_loaders/hsc_data_loader.py
@@ -18,8 +18,8 @@
 
 @fibad_data_loader
 class HSCDataLoader:
-    def __init__(self, data_loader_config):
-        self.config = data_loader_config
+    def __init__(self, config):
+        self.config = config
         self._data_set = self.data_set()
 
     def get_data_loader(self):
@@ -37,27 +37,28 @@
         if self.__dict__.get("_data_set", None) is not None:
             return self._data_set
 
-        self.config.get("path", "./data")
-
         # TODO: What will be a reasonable set of tranformations?
         # For now tanh all the values so they end up in [-1,1]
         # Another option might be sinh, but we'd need to mess with the example autoencoder module
         # Because it goes from unbounded NN output space -> [-1,1] with tanh in its decode step.
         transform = Lambda(lambd=np.tanh)
 
+        crop_to = self.config["data_loader"]["crop_to"]
+        filters = self.config["data_loader"]["filters"]
+
         return HSCDataSet(
-            self.config.get("path", "./data"),
+            self.config["general"]["data_dir"],
             transform=transform,
-            cutout_shape=self.config.get("crop_to", None),
-            filters=self.config.get("filters", None),
+            cutout_shape=crop_to if crop_to else None,
+            filters=filters if filters else None,
         )
 
     def data_loader(self, data_set):
         return torch.utils.data.DataLoader(
             data_set,
-            batch_size=self.config.get("batch_size", 4),
-            shuffle=self.config.get("shuffle", True),
-            num_workers=self.config.get("num_workers", 2),
+            batch_size=self.config["data_loader"]["batch_size"],
+            shuffle=self.config["data_loader"]["shuffle"],
+            num_workers=self.config["data_loader"]["num_workers"],
         )
 
     def shape(self):