From 6170f16dcc4033667fbacb0d0bc13c60898f58c4 Mon Sep 17 00:00:00 2001 From: mattlbeck <17108752+mattlbeck@users.noreply.github.com> Date: Sat, 11 Sep 2021 22:15:27 +0100 Subject: [PATCH] remove benedict dependency for verifying params Instead, the keypath util module from benedict is adapted into the dvc codebased and used natively to verify that a keypath exists within a dict. --- dvc/repo/experiments/__init__.py | 21 +++++++-- dvc/utils/keypath.py | 52 ++++++++++++++++++++++ tests/func/experiments/test_experiments.py | 1 + 3 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 dvc/utils/keypath.py diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index ea4cdbc179..f605a56a02 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -15,6 +15,7 @@ from dvc.path_info import PathInfo from dvc.stage.monitor import CheckpointKilledError from dvc.utils import relpath +from dvc.utils.keypath import parse_keypath from .base import ( EXEC_APPLY, @@ -358,11 +359,25 @@ def _get_new_params(self, src: Dict, to_update: Dict) -> List: config file. to_update is a flat dict with keypath keys representing param names. """ - from benedict import benedict + new_params = [] - b_src = benedict(src) + for param in to_update: + if self._is_new_param(src, param): + new_params.append(param) + return new_params - return list(set(to_update.keys()) - set(b_src.keypaths(indexes=True))) + def _is_new_param(self, src_dict: Dict, keypath: str) -> bool: + """ + Test whether the given keypath is in the given dict + """ + subdict = src_dict + for key in parse_keypath(keypath, "."): + try: + subdict = subdict[key] + except (KeyError, IndexError, TypeError) as e: + logger.debug(e) + return True + return False def _format_new_params_msg(self, new_params, config_path): """Format an error message for when new parameters are identified""" diff --git a/dvc/utils/keypath.py b/dvc/utils/keypath.py new file mode 100644 index 0000000000..35f1b1fa4b --- /dev/null +++ b/dvc/utils/keypath.py @@ -0,0 +1,52 @@ +""" +python-benedict style keypath parsing. +Adapted from https://github.com/fabiocaccamo/python-benedict/blob/c\ +98c471065ae84b4752a87f1bd63fe3987783663/benedict/dicts/keypath/keypath_util.py +""" + +import re + +KEY_INDEX_RE = r"(?:\[[\'\"]*(\-?[\d]+)[\'\"]*\]){1}$" + + +def parse_keypath(keypath, separator): + """ + Splits keys and indexes using the given separator: + eg. 'item[0].subitem[1]' -> ['item', 0, 'subitem', 1]. + """ + keys1 = _split_keys(keypath, separator) + keys2 = [] + for key in keys1: + keys2 += _split_key_indexes(key) + return keys2 + + +def _split_key_indexes(key): + """ + Splits key indexes: + eg. 'item[0][1]' -> ['item', 0, 1]. + """ + if "[" in key and key.endswith("]"): + keys = [] + while True: + matches = re.findall(KEY_INDEX_RE, key) + if matches: + key = re.sub(KEY_INDEX_RE, "", key) + index = int(matches[0]) + keys.insert(0, index) + # keys.insert(0, { keylist_util.INDEX_KEY:index }) + continue + keys.insert(0, key) + break + return keys + return [key] + + +def _split_keys(keypath, separator): + """ + Splits keys using the given separator: + eg. 'item.subitem[1]' -> ['item', 'subitem[1]']. + """ + if separator: + return keypath.split(separator) + return [keypath] diff --git a/tests/func/experiments/test_experiments.py b/tests/func/experiments/test_experiments.py index 54d8fa828d..badd5e12f6 100644 --- a/tests/func/experiments/test_experiments.py +++ b/tests/func/experiments/test_experiments.py @@ -150,6 +150,7 @@ def test_modify_params(tmp_dir, scm, dvc, mocker, changes, expected): [["lorem.ipsum=3"], ["foo[0].bazar=3"]], ) def test_add_params(tmp_dir, scm, dvc, changes): + """Adding params through exp run should always result in an error""" tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen( "params.yaml", "{foo: [bar: 1, baz: 2], goo: {bag: 3}, lorem: false}"