Skip to content
/ dvc Public
forked from iterative/dvc

Commit

Permalink
benedict: monkeypatch, performance improvements, bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry committed Sep 29, 2021
1 parent 612696c commit 1df213e
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 12 deletions.
26 changes: 26 additions & 0 deletions dvc/utils/_benedict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Rollbacks monkeypatching of `json.encoder` by benedict.
It monkeypatches json to use Python-based encoder instead of C-based
during import.
We rollback that monkeypatch by keeping reference to that C-based
encoder and reinstate them after importing benedict.
See: https://github.com/iterative/dvc/issues/6423
https://github.com/fabiocaccamo/python-benedict/issues/62
and the source of truth:
https://github.com/fabiocaccamo/python-benedict/blob/c98c471065/benedict/dicts/__init__.py#L282-L285
"""
from json import encoder

try:
c_make_encoder = encoder.c_make_encoder # type: ignore[attr-defined]
except AttributeError:
c_make_encoder = None


from benedict import benedict # noqa: E402

encoder.c_make_encoder = c_make_encoder # type: ignore[attr-defined]
# Please import benedict from here lazily
__all__ = ["benedict"]
13 changes: 2 additions & 11 deletions dvc/utils/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,10 @@ def chunk_dict(d: Dict[_KT, _VT], size: int = 1) -> List[Dict[_KT, _VT]]:

def merge_params(src: Dict, to_update: Dict) -> Dict:
"""Recursively merges params with benedict's syntax support in-place."""
from benedict import benedict
from ._benedict import benedict

data = benedict(src)
if src:
data.merge(to_update, overwrite=True)
else:
# NOTE: the following line may seem like an unnecessary duplication
# data.merge might affect the `src` if it's not empty, so we cannot
# check `if src` later, as it may have been mutated already.
data.merge(to_update, overwrite=True)
# benedict has issues keeping references to an empty dictionary
# see: https://github.com/iterative/dvc/issues/6374.
src.update(data)
data.merge(to_update, overwrite=True)
return src


Expand Down
2 changes: 1 addition & 1 deletion requirements/default.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dpath>=2.0.2,<3
shtab>=1.3.4,<2
rich>=10.9.0
dictdiffer>=0.8.1
python-benedict>=0.21.1
python-benedict>=0.24.2
pyparsing==2.4.7
typing_extensions>=3.10.0.2
fsspec[http]>=2021.8.1
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/utils/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# pylint: disable=unidiomatic-typecheck
import json
from json import encoder

import pytest
from mock import create_autospec

Expand All @@ -8,6 +11,7 @@
merge_params,
validate,
)
from dvc.utils.serialize import dumps_yaml


class MyDict(dict):
Expand Down Expand Up @@ -143,6 +147,12 @@ def none_filter(result):
assert result == [1, 2]


def is_serializable(d):
json.dumps(d)
dumps_yaml(d)
return True


@pytest.mark.parametrize(
"changes, expected",
[
Expand Down Expand Up @@ -195,6 +205,8 @@ def test_merge_params(changes, expected):
merged = merge_params(params, changes)
assert merged == expected == params
assert params is merged # references should be preserved
assert encoder.c_make_encoder
assert is_serializable(params)


@pytest.mark.parametrize(
Expand All @@ -203,10 +215,20 @@ def test_merge_params(changes, expected):
[{"foo": "baz"}, {"foo": "baz"}],
[{"foo": "baz", "goo": "bar"}, {"foo": "baz", "goo": "bar"}],
[{"foo[1]": ["baz", "goo"]}, {"foo": [None, ["baz", "goo"]]}],
[{"foo.bar": "baz"}, {"foo": {"bar": "baz"}}],
],
)
def test_merge_params_on_empty_src(changes, expected):
params = {}
merged = merge_params(params, changes)
assert merged == expected == params
assert params is merged # references should be preserved
assert encoder.c_make_encoder
assert is_serializable(params)


def test_benedict_rollback_its_monkeypatch():
from dvc.utils._benedict import benedict

assert benedict({"foo": "foo"}) == {"foo": "foo"}
assert encoder.c_make_encoder

0 comments on commit 1df213e

Please sign in to comment.