diff --git a/dvc/utils/_benedict.py b/dvc/utils/_benedict.py new file mode 100644 index 0000000000..219c96e674 --- /dev/null +++ b/dvc/utils/_benedict.py @@ -0,0 +1,26 @@ +""" +Rollbacks monkeypatching of `json.encoder` by benedict. + +It monkeypatches json to use Python-based encoder instead of C-based +during import. + +We rollback that monkeypatch by keeping reference to that C-based +encoder and reinstate them after importing benedict. +See: https://github.com/iterative/dvc/issues/6423 + https://github.com/fabiocaccamo/python-benedict/issues/62 +and the source of truth: +https://github.com/fabiocaccamo/python-benedict/blob/c98c471065/benedict/dicts/__init__.py#L282-L285 +""" +from json import encoder + +try: + c_make_encoder = encoder.c_make_encoder # type: ignore[attr-defined] +except AttributeError: + c_make_encoder = None + + +from benedict import benedict # noqa: E402 + +encoder.c_make_encoder = c_make_encoder # type: ignore[attr-defined] +# Please import benedict from here lazily +__all__ = ["benedict"] diff --git a/dvc/utils/collections.py b/dvc/utils/collections.py index 71bbde03b5..f05cdb8f52 100644 --- a/dvc/utils/collections.py +++ b/dvc/utils/collections.py @@ -82,19 +82,10 @@ def chunk_dict(d: Dict[_KT, _VT], size: int = 1) -> List[Dict[_KT, _VT]]: def merge_params(src: Dict, to_update: Dict) -> Dict: """Recursively merges params with benedict's syntax support in-place.""" - from benedict import benedict + from ._benedict import benedict data = benedict(src) - if src: - data.merge(to_update, overwrite=True) - else: - # NOTE: the following line may seem like an unnecessary duplication - # data.merge might affect the `src` if it's not empty, so we cannot - # check `if src` later, as it may have been mutated already. - data.merge(to_update, overwrite=True) - # benedict has issues keeping references to an empty dictionary - # see: https://github.com/iterative/dvc/issues/6374. - src.update(data) + data.merge(to_update, overwrite=True) return src diff --git a/requirements/default.txt b/requirements/default.txt index b5805bbaa7..86c819ca25 100644 --- a/requirements/default.txt +++ b/requirements/default.txt @@ -37,7 +37,7 @@ dpath>=2.0.2,<3 shtab>=1.3.4,<2 rich>=10.9.0 dictdiffer>=0.8.1 -python-benedict>=0.21.1 +python-benedict>=0.24.2 pyparsing==2.4.7 typing_extensions>=3.10.0.2 fsspec[http]>=2021.8.1 diff --git a/tests/unit/utils/test_collections.py b/tests/unit/utils/test_collections.py index 1d032c3b55..50dcbcc518 100644 --- a/tests/unit/utils/test_collections.py +++ b/tests/unit/utils/test_collections.py @@ -1,4 +1,7 @@ # pylint: disable=unidiomatic-typecheck +import json +from json import encoder + import pytest from mock import create_autospec @@ -8,6 +11,7 @@ merge_params, validate, ) +from dvc.utils.serialize import dumps_yaml class MyDict(dict): @@ -143,6 +147,12 @@ def none_filter(result): assert result == [1, 2] +def is_serializable(d): + json.dumps(d) + dumps_yaml(d) + return True + + @pytest.mark.parametrize( "changes, expected", [ @@ -195,6 +205,8 @@ def test_merge_params(changes, expected): merged = merge_params(params, changes) assert merged == expected == params assert params is merged # references should be preserved + assert encoder.c_make_encoder + assert is_serializable(params) @pytest.mark.parametrize( @@ -203,6 +215,7 @@ def test_merge_params(changes, expected): [{"foo": "baz"}, {"foo": "baz"}], [{"foo": "baz", "goo": "bar"}, {"foo": "baz", "goo": "bar"}], [{"foo[1]": ["baz", "goo"]}, {"foo": [None, ["baz", "goo"]]}], + [{"foo.bar": "baz"}, {"foo": {"bar": "baz"}}], ], ) def test_merge_params_on_empty_src(changes, expected): @@ -210,3 +223,12 @@ def test_merge_params_on_empty_src(changes, expected): merged = merge_params(params, changes) assert merged == expected == params assert params is merged # references should be preserved + assert encoder.c_make_encoder + assert is_serializable(params) + + +def test_benedict_rollback_its_monkeypatch(): + from dvc.utils._benedict import benedict + + assert benedict({"foo": "foo"}) == {"foo": "foo"} + assert encoder.c_make_encoder