From 25af6ed4993e156863b353ca89f5e41788062175 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Thu, 15 Aug 2024 16:27:23 +0100 Subject: [PATCH 1/6] Add test to check runtime parameters are overwritten correctly when run env is also set Signed-off-by: Merel Theisen --- kedro/config/omegaconf_config.py | 4 ++- tests/config/test_omegaconf_config.py | 39 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/kedro/config/omegaconf_config.py b/kedro/config/omegaconf_config.py index 51fae73906..0afec175bf 100644 --- a/kedro/config/omegaconf_config.py +++ b/kedro/config/omegaconf_config.py @@ -343,7 +343,9 @@ def load_and_merge_dir_config( # noqa: PLR0913 if not aggregate_config: return {} - if key == "parameters": + # Only merge in runtime parameters ones for the base env + is_base_env = conf_path.endswith(self.base_env) + if key == "parameters" and is_base_env: # Merge with runtime parameters only for "parameters" return OmegaConf.to_container( OmegaConf.merge(*aggregate_config, self.runtime_params), resolve=True diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index 1c0b26a6d7..a54f89b6cd 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -1068,6 +1068,45 @@ def test_runtime_params_resolution(self, tmp_path): # runtime params are resolved correctly in catalog assert conf["catalog"]["companies"]["type"] == runtime_params["dataset"]["type"] + def test_runtime_params_resolution_with_env(self, tmp_path): + base_params = tmp_path / _BASE_ENV / "parameters.yml" + prod_params = tmp_path / "prod" / "parameters.yml" + runtime_params = { + "aaa": { + "bbb": { + "abb": "2011-11-11", + } + } + } + param_config = { + "aaa": { + "bbb": { + "aba": "2023-11-01", + "abb": "2023-11-01", + "abc": 14, + } + }, + "xyz": {"asdf": 123123}, + } + prod_param_config = {"def": {"gg": 123}} + _write_yaml(base_params, param_config) + _write_yaml(prod_params, prod_param_config) + conf = OmegaConfigLoader( + tmp_path, + base_env=_BASE_ENV, + default_run_env="prod", + runtime_params=runtime_params, + ) + + expected_parameters = { + "aaa": {"bbb": {"aba": "2023-11-01", "abb": "2011-11-11", "abc": 14}}, + "xyz": {"asdf": 123123}, + "def": {"gg": 123}, + } + + # runtime parameters are resolved correctly across parameter files from different environments + assert conf["parameters"] == expected_parameters + def test_runtime_params_missing_default(self, tmp_path): base_params = tmp_path / _BASE_ENV / "parameters.yml" runtime_params = { From c0301daab27b7cb45c79fb69bf5f9a2159210c0d Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Fri, 16 Aug 2024 11:52:04 +0100 Subject: [PATCH 2/6] Update release notes Signed-off-by: Merel Theisen --- RELEASE.md | 1 + kedro/config/omegaconf_config.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 4e72356aa5..21b1f875a4 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -6,6 +6,7 @@ ## Bug fixes and other changes * Moved `_find_run_command()` and `_find_run_command_in_plugins()` from `__main__.py` in the project template to the framework itself. * Fixed a bug where `%load_node` breaks with multi-lines import statements. +* Fixed runtime parameters resolution in `OmegaConfigLoader` by only loading them once. ## Breaking changes to the API diff --git a/kedro/config/omegaconf_config.py b/kedro/config/omegaconf_config.py index 0afec175bf..a197f25788 100644 --- a/kedro/config/omegaconf_config.py +++ b/kedro/config/omegaconf_config.py @@ -343,7 +343,7 @@ def load_and_merge_dir_config( # noqa: PLR0913 if not aggregate_config: return {} - # Only merge in runtime parameters ones for the base env + # Only merge in runtime parameters once for the base env is_base_env = conf_path.endswith(self.base_env) if key == "parameters" and is_base_env: # Merge with runtime parameters only for "parameters" From ff6ce75d3351ff9429c8286fb8883b61dd794b85 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Mon, 19 Aug 2024 14:49:55 +0100 Subject: [PATCH 3/6] Add extra test case, revert changes + use soft merge in test instead Signed-off-by: Merel Theisen --- kedro/config/omegaconf_config.py | 4 +-- tests/config/test_omegaconf_config.py | 41 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/kedro/config/omegaconf_config.py b/kedro/config/omegaconf_config.py index a197f25788..51fae73906 100644 --- a/kedro/config/omegaconf_config.py +++ b/kedro/config/omegaconf_config.py @@ -343,9 +343,7 @@ def load_and_merge_dir_config( # noqa: PLR0913 if not aggregate_config: return {} - # Only merge in runtime parameters once for the base env - is_base_env = conf_path.endswith(self.base_env) - if key == "parameters" and is_base_env: + if key == "parameters": # Merge with runtime parameters only for "parameters" return OmegaConf.to_container( OmegaConf.merge(*aggregate_config, self.runtime_params), resolve=True diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index a54f89b6cd..818ea1c5ee 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -1096,6 +1096,7 @@ def test_runtime_params_resolution_with_env(self, tmp_path): base_env=_BASE_ENV, default_run_env="prod", runtime_params=runtime_params, + merge_strategy={"parameters": "soft"}, ) expected_parameters = { @@ -1107,6 +1108,46 @@ def test_runtime_params_resolution_with_env(self, tmp_path): # runtime parameters are resolved correctly across parameter files from different environments assert conf["parameters"] == expected_parameters + def test_runtime_params_resolution_with_env2(self, tmp_path): + base_params = tmp_path / _BASE_ENV / "parameters.yml" + prod_params = tmp_path / "prod" / "parameters.yml" + runtime_params = {"data_shift": 3} + param_config = { + "model_options": { + "test_size": 0.2, + "random_state": 3, + "features": ["engines"], + } + } + prod_param_config = { + "model_options": { + "test_size": 0.2, + "random_state": 3, + "features": ["engines"], + }, + "data_shift": 1, + } + _write_yaml(base_params, param_config) + _write_yaml(prod_params, prod_param_config) + conf = OmegaConfigLoader( + tmp_path, + base_env=_BASE_ENV, + default_run_env="prod", + runtime_params=runtime_params, + ) + + expected_parameters = { + "model_options": { + "test_size": 0.2, + "random_state": 3, + "features": ["engines"], + }, + "data_shift": 3, + } + + # runtime parameters are resolved correctly across parameter files from different environments + assert conf["parameters"] == expected_parameters + def test_runtime_params_missing_default(self, tmp_path): base_params = tmp_path / _BASE_ENV / "parameters.yml" runtime_params = { From 7b0a5d27787c3b3e3cff0fb77365d88bb90fbf07 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Wed, 21 Aug 2024 11:23:44 +0100 Subject: [PATCH 4/6] Add clarifications in docs explaining how runtime parameter resolution works. Signed-off-by: Merel Theisen --- RELEASE.md | 2 +- docs/source/configuration/advanced_configuration.md | 3 ++- docs/source/configuration/parameters.md | 3 ++- tests/config/test_omegaconf_config.py | 7 +++++-- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 21b1f875a4..781e42f095 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -6,11 +6,11 @@ ## Bug fixes and other changes * Moved `_find_run_command()` and `_find_run_command_in_plugins()` from `__main__.py` in the project template to the framework itself. * Fixed a bug where `%load_node` breaks with multi-lines import statements. -* Fixed runtime parameters resolution in `OmegaConfigLoader` by only loading them once. ## Breaking changes to the API ## Documentation changes +* Add clarifications in docs explaining how runtime parameter resolution works. ## Community contributions diff --git a/docs/source/configuration/advanced_configuration.md b/docs/source/configuration/advanced_configuration.md index 4d547e619a..914f5f44ae 100644 --- a/docs/source/configuration/advanced_configuration.md +++ b/docs/source/configuration/advanced_configuration.md @@ -322,7 +322,8 @@ Note that you can only use the resolver in `credentials.yml` and not in catalog ``` ### How to change the merge strategy used by `OmegaConfigLoader` -By default, `OmegaConfigLoader` merges configuration [in different environments](configuration_basics.md#configuration-environments) in a destructive way. This means that whatever configuration resides in your overriding environment (`local` by default) takes precedence when the same top-level key is present in the base and overriding environment. Any configuration for that key **besides that given in the overriding environment** is discarded. +By default, `OmegaConfigLoader` merges configuration [in different environments](configuration_basics.md#configuration-environments) as well as runtime parameters in a destructive way. This means that whatever configuration resides in your overriding environment (`local` by default) takes precedence when the same top-level key is present in the base and overriding environment. Any configuration for that key **besides that given in the overriding environment** is discarded. +The same behaviour applies to runtime parameters overriding any configuration in the `base` environment. You can change the merge strategy for each configuration type in your project's `src//settings.py`. The accepted merging strategies are `soft` and `destructive`. ```python diff --git a/docs/source/configuration/parameters.md b/docs/source/configuration/parameters.md index 80abb7eece..a2925aee4c 100644 --- a/docs/source/configuration/parameters.md +++ b/docs/source/configuration/parameters.md @@ -117,7 +117,8 @@ Each key-value pair is split on the first equals sign. The following example is ```bash kedro run --params=param_key1=value1,param_key2=2.0 ``` -Values provided in the CLI take precedence and overwrite parameters specified in configuration files. +Values provided in the CLI take precedence and overwrite parameters specified in configuration files. By default, runtime parameters get merged destructively, meaning that any configuration for that key **besides that given in the runtime parameters** is discarded. +[This section describes how to change the merging strategy](#how-to-change-the-merge-strategy-used-by-omegaconfigloader). * Parameter keys are _always_ treated as strings. * Parameter values are converted to a float or an integer number if the corresponding conversion succeeds; otherwise, they are also treated as string. diff --git a/tests/config/test_omegaconf_config.py b/tests/config/test_omegaconf_config.py index 818ea1c5ee..101d9ea00d 100644 --- a/tests/config/test_omegaconf_config.py +++ b/tests/config/test_omegaconf_config.py @@ -1068,7 +1068,9 @@ def test_runtime_params_resolution(self, tmp_path): # runtime params are resolved correctly in catalog assert conf["catalog"]["companies"]["type"] == runtime_params["dataset"]["type"] - def test_runtime_params_resolution_with_env(self, tmp_path): + def test_runtime_params_resolution_with_soft_merge_base_env(self, tmp_path): + """Test that runtime_params get softly merged with the base environment when soft merge is set + for parameter merge""" base_params = tmp_path / _BASE_ENV / "parameters.yml" prod_params = tmp_path / "prod" / "parameters.yml" runtime_params = { @@ -1108,7 +1110,8 @@ def test_runtime_params_resolution_with_env(self, tmp_path): # runtime parameters are resolved correctly across parameter files from different environments assert conf["parameters"] == expected_parameters - def test_runtime_params_resolution_with_env2(self, tmp_path): + def test_runtime_params_resolution_default_run_env(self, tmp_path): + """Test that runtime_params overwrite merge with the default run environment""" base_params = tmp_path / _BASE_ENV / "parameters.yml" prod_params = tmp_path / "prod" / "parameters.yml" runtime_params = {"data_shift": 3} From 4b2fdffce945b049b39deccdeb28c775bc483677 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Wed, 21 Aug 2024 15:47:33 +0100 Subject: [PATCH 5/6] Add example of how runtime parameters will be merged Signed-off-by: Merel Theisen --- docs/source/configuration/parameters.md | 41 ++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/docs/source/configuration/parameters.md b/docs/source/configuration/parameters.md index a2925aee4c..c8593219f9 100644 --- a/docs/source/configuration/parameters.md +++ b/docs/source/configuration/parameters.md @@ -118,7 +118,46 @@ Each key-value pair is split on the first equals sign. The following example is kedro run --params=param_key1=value1,param_key2=2.0 ``` Values provided in the CLI take precedence and overwrite parameters specified in configuration files. By default, runtime parameters get merged destructively, meaning that any configuration for that key **besides that given in the runtime parameters** is discarded. -[This section describes how to change the merging strategy](#how-to-change-the-merge-strategy-used-by-omegaconfigloader). +[This section describes how to change the merging strategy](advanced_configuration.md#how-to-change-the-merge-strategy-used-by-omegaconfigloader). + +For example, if you have the following parameters in your `base` and `local` environments: + +```yml +# base/parameters.yml +model_options: + model_params: + learning_date: "2023-11-01" + training_date: "2023-11-01" + data_ratio: 14 + +data_options: + step_size: 123123 +``` + +```yml +# local/parameters.yml +features: + rate: 123 +``` + +And you provide the following parameter at runtime: + +```bash +kedro run --params="model_options.model_params.training_date=2011-11-11" +``` + +The final merged result will be: +```yml +model_options: + model_params: + training_date: "2011-11-11" + +data_options: + step_size: 123123 + +features: + rate: 123 +``` * Parameter keys are _always_ treated as strings. * Parameter values are converted to a float or an integer number if the corresponding conversion succeeds; otherwise, they are also treated as string. From 14db04f0091f77a72428434d2d22ccc2cdb23fa0 Mon Sep 17 00:00:00 2001 From: Merel Theisen Date: Wed, 21 Aug 2024 15:55:42 +0100 Subject: [PATCH 6/6] Fix docs Signed-off-by: Merel Theisen --- docs/source/configuration/parameters.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/configuration/parameters.md b/docs/source/configuration/parameters.md index c8593219f9..16d1514fec 100644 --- a/docs/source/configuration/parameters.md +++ b/docs/source/configuration/parameters.md @@ -122,7 +122,7 @@ Values provided in the CLI take precedence and overwrite parameters specified in For example, if you have the following parameters in your `base` and `local` environments: -```yml +```yaml # base/parameters.yml model_options: model_params: @@ -134,7 +134,7 @@ data_options: step_size: 123123 ``` -```yml +```yaml # local/parameters.yml features: rate: 123 @@ -147,7 +147,7 @@ kedro run --params="model_options.model_params.training_date=2011-11-11" ``` The final merged result will be: -```yml +```yaml model_options: model_params: training_date: "2011-11-11"