Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

benedict: monkeypatch, performance improvements, bugfixes #6706

Merged
merged 1 commit into from
Oct 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions dvc/utils/_benedict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Rollbacks monkeypatching of `json.encoder` by benedict.

It monkeypatches json to use Python-based encoder instead of C-based
during import.

We rollback that monkeypatch by keeping reference to that C-based
encoder and reinstate them after importing benedict.
See: https://github.com/iterative/dvc/issues/6423
https://github.com/fabiocaccamo/python-benedict/issues/62
and the source of truth:
https://github.com/fabiocaccamo/python-benedict/blob/c98c471065/benedict/dicts/__init__.py#L282-L285
"""
from json import encoder

try:
c_make_encoder = encoder.c_make_encoder # type: ignore[attr-defined]
except AttributeError:
c_make_encoder = None


from benedict import benedict # noqa: E402

encoder.c_make_encoder = c_make_encoder # type: ignore[attr-defined]
# Please import benedict from here lazily
__all__ = ["benedict"]
13 changes: 2 additions & 11 deletions dvc/utils/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,10 @@ def chunk_dict(d: Dict[_KT, _VT], size: int = 1) -> List[Dict[_KT, _VT]]:

def merge_params(src: Dict, to_update: Dict) -> Dict:
"""Recursively merges params with benedict's syntax support in-place."""
from benedict import benedict
from ._benedict import benedict

data = benedict(src)
if src:
data.merge(to_update, overwrite=True)
else:
# NOTE: the following line may seem like an unnecessary duplication
# data.merge might affect the `src` if it's not empty, so we cannot
# check `if src` later, as it may have been mutated already.
data.merge(to_update, overwrite=True)
# benedict has issues keeping references to an empty dictionary
# see: https://github.com/iterative/dvc/issues/6374.
src.update(data)
Comment on lines -90 to -97
Copy link
Member Author

@skshetry skshetry Sep 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This else was for a bug in the benedict, and it has been fixed. To resolve this bug, that monkeypatch I mentioned was introduced 😢. We no longer need this else block, and tests should cover this if it breaks again in the future.

data.merge(to_update, overwrite=True)
return src


Expand Down
2 changes: 1 addition & 1 deletion requirements/default.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dpath>=2.0.2,<3
shtab>=1.3.4,<2
rich>=10.9.0
dictdiffer>=0.8.1
python-benedict>=0.21.1
python-benedict>=0.24.2
pyparsing==2.4.7
typing_extensions>=3.10.0.2
fsspec[http]>=2021.8.1
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/utils/test_collections.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# pylint: disable=unidiomatic-typecheck
import json
from json import encoder

import pytest
from mock import create_autospec

Expand All @@ -8,6 +11,7 @@
merge_params,
validate,
)
from dvc.utils.serialize import dumps_yaml


class MyDict(dict):
Expand Down Expand Up @@ -143,6 +147,12 @@ def none_filter(result):
assert result == [1, 2]


def is_serializable(d):
json.dumps(d)
dumps_yaml(d)
return True


@pytest.mark.parametrize(
"changes, expected",
[
Expand Down Expand Up @@ -195,6 +205,8 @@ def test_merge_params(changes, expected):
merged = merge_params(params, changes)
assert merged == expected == params
assert params is merged # references should be preserved
assert encoder.c_make_encoder
assert is_serializable(params)


@pytest.mark.parametrize(
Expand All @@ -203,10 +215,20 @@ def test_merge_params(changes, expected):
[{"foo": "baz"}, {"foo": "baz"}],
[{"foo": "baz", "goo": "bar"}, {"foo": "baz", "goo": "bar"}],
[{"foo[1]": ["baz", "goo"]}, {"foo": [None, ["baz", "goo"]]}],
[{"foo.bar": "baz"}, {"foo": {"bar": "baz"}}],
Copy link
Member Author

@skshetry skshetry Sep 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Testing for scenario mentioned in #6476, coupled with is_serializable assertion below.

],
)
def test_merge_params_on_empty_src(changes, expected):
params = {}
merged = merge_params(params, changes)
assert merged == expected == params
assert params is merged # references should be preserved
assert encoder.c_make_encoder
assert is_serializable(params)


def test_benedict_rollback_its_monkeypatch():
from dvc.utils._benedict import benedict

assert benedict({"foo": "foo"}) == {"foo": "foo"}
assert encoder.c_make_encoder
Comment on lines +230 to +234
Copy link
Member Author

@skshetry skshetry Sep 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test for the monkeypatch that we rollback.