Skip to content

Commit

Permalink
remove benedict dependency for verifying params
Browse files Browse the repository at this point in the history
Instead, the keypath util module from benedict is adapted into the dvc codebased and used
natively to verify that a keypath exists within a dict.
  • Loading branch information
mattlbeck committed Oct 1, 2021
1 parent c9eb1a6 commit 6170f16
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 3 deletions.
21 changes: 18 additions & 3 deletions dvc/repo/experiments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dvc.path_info import PathInfo
from dvc.stage.monitor import CheckpointKilledError
from dvc.utils import relpath
from dvc.utils.keypath import parse_keypath

from .base import (
EXEC_APPLY,
Expand Down Expand Up @@ -358,11 +359,25 @@ def _get_new_params(self, src: Dict, to_update: Dict) -> List:
config file. to_update is a flat dict with keypath keys representing
param names.
"""
from benedict import benedict
new_params = []

b_src = benedict(src)
for param in to_update:
if self._is_new_param(src, param):
new_params.append(param)
return new_params

return list(set(to_update.keys()) - set(b_src.keypaths(indexes=True)))
def _is_new_param(self, src_dict: Dict, keypath: str) -> bool:
"""
Test whether the given keypath is in the given dict
"""
subdict = src_dict
for key in parse_keypath(keypath, "."):
try:
subdict = subdict[key]
except (KeyError, IndexError, TypeError) as e:
logger.debug(e)
return True
return False

def _format_new_params_msg(self, new_params, config_path):
"""Format an error message for when new parameters are identified"""
Expand Down
52 changes: 52 additions & 0 deletions dvc/utils/keypath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
python-benedict style keypath parsing.
Adapted from https://github.com/fabiocaccamo/python-benedict/blob/c\
98c471065ae84b4752a87f1bd63fe3987783663/benedict/dicts/keypath/keypath_util.py
"""

import re

KEY_INDEX_RE = r"(?:\[[\'\"]*(\-?[\d]+)[\'\"]*\]){1}$"


def parse_keypath(keypath, separator):
"""
Splits keys and indexes using the given separator:
eg. 'item[0].subitem[1]' -> ['item', 0, 'subitem', 1].
"""
keys1 = _split_keys(keypath, separator)
keys2 = []
for key in keys1:
keys2 += _split_key_indexes(key)
return keys2


def _split_key_indexes(key):
"""
Splits key indexes:
eg. 'item[0][1]' -> ['item', 0, 1].
"""
if "[" in key and key.endswith("]"):
keys = []
while True:
matches = re.findall(KEY_INDEX_RE, key)
if matches:
key = re.sub(KEY_INDEX_RE, "", key)
index = int(matches[0])
keys.insert(0, index)
# keys.insert(0, { keylist_util.INDEX_KEY:index })
continue
keys.insert(0, key)
break
return keys
return [key]


def _split_keys(keypath, separator):
"""
Splits keys using the given separator:
eg. 'item.subitem[1]' -> ['item', 'subitem[1]'].
"""
if separator:
return keypath.split(separator)
return [keypath]
1 change: 1 addition & 0 deletions tests/func/experiments/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def test_modify_params(tmp_dir, scm, dvc, mocker, changes, expected):
[["lorem.ipsum=3"], ["foo[0].bazar=3"]],
)
def test_add_params(tmp_dir, scm, dvc, changes):
"""Adding params through exp run should always result in an error"""
tmp_dir.gen("copy.py", COPY_SCRIPT)
tmp_dir.gen(
"params.yaml", "{foo: [bar: 1, baz: 2], goo: {bag: 3}, lorem: false}"
Expand Down

0 comments on commit 6170f16

Please sign in to comment.