Skip to content

Commit

Permalink
Merge pull request #380 from seperman/dev
Browse files Browse the repository at this point in the history
6.3.1
  • Loading branch information
seperman authored Jul 6, 2023
2 parents 75e1edd + 8951d92 commit d52b2da
Show file tree
Hide file tree
Showing 17 changed files with 427 additions and 42 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ jobs:
if: matrix.python-version != 3.7
run: pip install -r requirements-dev.txt
- name: Lint with flake8
if: matrix.python-version == 3.10
if: matrix.python-version == 3.11
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 deepdiff --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 deepdiff --count --exit-zero --max-complexity=26 --max-line-lengt=250 --statistics
- name: Test with pytest and get the coverage
if: matrix.python-version == 3.10
if: matrix.python-version == 3.11
run: |
pytest --cov-report=xml --cov=deepdiff tests/ --runslow
- name: Test with pytest and no coverage report
if: matrix.python-version != 3.10
if: matrix.python-version != 3.11
run: |
pytest
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
if: matrix.python-version == 3.10
uses: codecov/codecov-action@v3
if: matrix.python-version == 3.11
with:
file: ./coverage.xml
env_vars: OS,PYTHON
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ If you want to improve the performance of DeepDiff with certain functionalities

`pip install "deepdiff[optimize]"`

Install optional packages:
- [yaml](https://pypi.org/project/PyYAML/)
- [tomli](https://pypi.org/project/tomli/) (python 3.10 and older) and [tomli-w](https://pypi.org/project/tomli-w/) for writing
- [clevercsv](https://pypi.org/project/clevercsv/) for more rubust CSV parsing
- [orjson](https://pypi.org/project/orjson/) for speed and memory optimized parsing
- [pydantic](https://pypi.org/project/pydantic/)


# Documentation

<https://zepworks.com/deepdiff/current/>
Expand Down
40 changes: 31 additions & 9 deletions deepdiff/deephash.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#!/usr/bin/env python
import inspect
import logging
from collections.abc import Iterable, MutableMapping
from collections import defaultdict
from hashlib import sha1, sha256
from pathlib import Path
from enum import Enum
from deepdiff.helper import (strings, numbers, times, unprocessed, not_hashed, add_to_frozen_set,
convert_item_or_items_into_set_else_none, get_doc,
Expand Down Expand Up @@ -308,17 +310,28 @@ def items(self):
def _prep_obj(self, obj, parent, parents_ids=EMPTY_FROZENSET, is_namedtuple=False):
"""prepping objects"""
original_type = type(obj) if not isinstance(obj, type) else obj
try:
if is_namedtuple:
obj = obj._asdict()
else:
obj = obj.__dict__
except AttributeError:

obj_to_dict_strategies = []
if is_namedtuple:
obj_to_dict_strategies.append(lambda o: o._asdict())
else:
obj_to_dict_strategies.append(lambda o: o.__dict__)

if hasattr(obj, "__slots__"):
obj_to_dict_strategies.append(lambda o: {i: getattr(o, i) for i in o.__slots__})
else:
obj_to_dict_strategies.append(lambda o: dict(inspect.getmembers(o, lambda m: not inspect.isroutine(m))))

for get_dict in obj_to_dict_strategies:
try:
obj = {i: getattr(obj, i) for i in obj.__slots__}
d = get_dict(obj)
break
except AttributeError:
self.hashes[UNPROCESSED_KEY].append(obj)
return (unprocessed, 0)
pass
else:
self.hashes[UNPROCESSED_KEY].append(obj)
return (unprocessed, 0)
obj = d

result, counts = self._prep_dict(obj, parent=parent, parents_ids=parents_ids,
print_as_attribute=True, original_type=original_type)
Expand Down Expand Up @@ -420,6 +433,12 @@ def _prep_iterable(self, obj, parent, parents_ids=EMPTY_FROZENSET):
def _prep_bool(self, obj):
return BoolObj.TRUE if obj else BoolObj.FALSE


def _prep_path(self, obj):
type_ = obj.__class__.__name__
return KEY_TO_VAL_STR.format(type_, obj)


def _prep_number(self, obj):
type_ = "number" if self.ignore_numeric_type_changes else obj.__class__.__name__
if self.significant_digits is not None:
Expand Down Expand Up @@ -476,6 +495,9 @@ def _hash(self, obj, parent, parents_ids=EMPTY_FROZENSET):
ignore_encoding_errors=self.ignore_encoding_errors,
)

elif isinstance(obj, Path):
result = self._prep_path(obj)

elif isinstance(obj, times):
result = self._prep_datetime(obj)

Expand Down
23 changes: 18 additions & 5 deletions deepdiff/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
np_ndarray, np_array_factory, numpy_dtypes, get_doc,
not_found, numpy_dtype_string_to_type, dict_,
)
from deepdiff.path import _path_to_elements, _get_nested_obj, GET, GETATTR
from deepdiff.path import _path_to_elements, _get_nested_obj, _get_nested_obj_and_force, GET, GETATTR
from deepdiff.anyset import AnySet


Expand Down Expand Up @@ -70,6 +70,7 @@ def __init__(
safe_to_import=None,
serializer=pickle_dump,
verify_symmetry=False,
force=False,
):
if hasattr(deserializer, '__code__') and 'safe_to_import' in set(deserializer.__code__.co_varnames):
_deserializer = deserializer
Expand Down Expand Up @@ -104,6 +105,11 @@ def _deserializer(obj, safe_to_import=None):
self._numpy_paths = self.diff.pop('_numpy_paths', False)
self.serializer = serializer
self.deserializer = deserializer
self.force = force
if force:
self.get_nested_obj = _get_nested_obj_and_force
else:
self.get_nested_obj = _get_nested_obj
self.reset()

def __repr__(self):
Expand Down Expand Up @@ -162,7 +168,14 @@ def _get_elem_and_compare_to_old_value(self, obj, path_for_err_reporting, expect
current_old_value = getattr(obj, elem)
else:
raise DeltaError(INVALID_ACTION_WHEN_CALLING_GET_ELEM.format(action))
except (KeyError, IndexError, AttributeError, IndexError, TypeError) as e:
except (KeyError, IndexError, AttributeError, TypeError) as e:
if self.force:
forced_old_value = {}
if action == GET:
obj[elem] = forced_old_value
elif action == GETATTR:
setattr(obj, elem, forced_old_value)
return forced_old_value
current_old_value = not_found
if isinstance(path_for_err_reporting, (list, tuple)):
path_for_err_reporting = '.'.join([i[0] for i in path_for_err_reporting])
Expand Down Expand Up @@ -351,14 +364,14 @@ def _get_elements_and_details(self, path):
try:
elements = _path_to_elements(path)
if len(elements) > 1:
parent = _get_nested_obj(obj=self, elements=elements[:-2])
parent = self.get_nested_obj(obj=self, elements=elements[:-2])
parent_to_obj_elem, parent_to_obj_action = elements[-2]
obj = self._get_elem_and_compare_to_old_value(
obj=parent, path_for_err_reporting=path, expected_old_value=None,
elem=parent_to_obj_elem, action=parent_to_obj_action)
else:
parent = parent_to_obj_elem = parent_to_obj_action = None
obj = _get_nested_obj(obj=self, elements=elements[:-1])
obj = self.get_nested_obj(obj=self, elements=elements[:-1])
elem, action = elements[-1]
except Exception as e:
self._raise_or_log(UNABLE_TO_GET_ITEM_MSG.format(path, e))
Expand Down Expand Up @@ -458,7 +471,7 @@ def _do_set_item_removed(self):
def _do_set_or_frozenset_item(self, items, func):
for path, value in items.items():
elements = _path_to_elements(path)
parent = _get_nested_obj(obj=self, elements=elements[:-1])
parent = self.get_nested_obj(obj=self, elements=elements[:-1])
elem, action = elements[-1]
obj = self._get_elem_and_compare_to_old_value(
parent, path_for_err_reporting=path, expected_old_value=None, elem=elem, action=action)
Expand Down
12 changes: 8 additions & 4 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@
from collections import defaultdict
from itertools import zip_longest
from ordered_set import OrderedSet
from deepdiff.helper import (strings, bytes_type, numbers, uuids, times, ListItemRemovedOrAdded, notpresent,
from deepdiff.helper import (strings, bytes_type, numbers, uuids, datetimes, ListItemRemovedOrAdded, notpresent,
IndexedHash, unprocessed, add_to_frozen_set, basic_types,
convert_item_or_items_into_set_else_none, get_type,
convert_item_or_items_into_compiled_regexes_else_none,
type_is_subclass_of_type_group, type_in_type_group, get_doc,
number_to_string, datetime_normalize, KEY_TO_VAL_STR, booleans,
np_ndarray, np_floating, get_numpy_ndarray_rows, OrderedSetPlus, RepeatedTimer,
TEXT_VIEW, TREE_VIEW, DELTA_VIEW, detailed__dict__, add_root_to_paths,
np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS)
np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS,
PydanticBaseModel, )
from deepdiff.serialization import SerializationMixin
from deepdiff.distance import DistanceMixin
from deepdiff.model import (
Expand Down Expand Up @@ -452,7 +453,7 @@ def _skip_this(self, level):
if level_path not in self.include_paths:
skip = True
for prefix in self.include_paths:
if level_path.startswith(prefix):
if prefix in level_path or level_path in prefix:
skip = False
break
elif self.exclude_regex_paths and any(
Expand Down Expand Up @@ -1529,7 +1530,7 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=
if isinstance(level.t1, strings):
self._diff_str(level, local_tree=local_tree)

elif isinstance(level.t1, times):
elif isinstance(level.t1, datetimes):
self._diff_datetimes(level, local_tree=local_tree)

elif isinstance(level.t1, uuids):
Expand All @@ -1550,6 +1551,9 @@ def _diff(self, level, parents_ids=frozenset(), _original_type=None, local_tree=
elif isinstance(level.t1, np_ndarray):
self._diff_numpy_array(level, parents_ids, local_tree=local_tree)

elif isinstance(level.t1, PydanticBaseModel):
self._diff_obj(level, parents_ids, local_tree=local_tree)

elif isinstance(level.t1, Iterable):
self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree)

Expand Down
10 changes: 10 additions & 0 deletions deepdiff/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ class np_type:
pass


class pydantic_base_model_type:
pass


try:
import numpy as np
except ImportError: # pragma: no cover. The case without Numpy is tested locally only.
Expand Down Expand Up @@ -84,6 +88,12 @@ class np_type:
item.__name__: item for item in numpy_dtypes
}

try:
from pydantic.main import BaseModel as PydanticBaseModel
except ImportError:
PydanticBaseModel = pydantic_base_model_type


logger = logging.getLogger(__name__)

py_major_version = sys.version_info.major
Expand Down
18 changes: 18 additions & 0 deletions deepdiff/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,24 @@ def _get_nested_obj(obj, elements):
return obj


def _get_nested_obj_and_force(obj, elements):
for (elem, action) in elements:
if action == GET:
try:
obj = obj[elem]
except KeyError:
obj[elem] = {}
obj = obj[elem]
except IndexError:
if isinstance(obj, list) and isinstance(elem, int) and elem >= len(obj):
obj.extend([None] * (elem - len(obj)))
obj.append({})
obj = obj[-1]
elif action == GETATTR:
obj = getattr(obj, elem)
return obj


def extract(obj, path):
"""
Get the item from obj based on path.
Expand Down
27 changes: 17 additions & 10 deletions deepdiff/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@
except ImportError: # pragma: no cover.
yaml = None # pragma: no cover.
try:
import toml
if sys.version_info >= (3, 11):
import tomllib as tomli
else:
import tomli
except ImportError: # pragma: no cover.
tomli = None # pragma: no cover.
try:
import tomli_w
except ImportError: # pragma: no cover.
toml = None # pragma: no cover.
tomli_w = None # pragma: no cover.
try:
import clevercsv
csv = None
Expand Down Expand Up @@ -430,10 +437,10 @@ def load_path_content(path, file_type=None):
with open(path, 'r') as the_file:
content = yaml.safe_load(the_file)
elif file_type == 'toml':
if toml is None: # pragma: no cover.
raise ImportError('Toml needs to be installed.') # pragma: no cover.
with open(path, 'r') as the_file:
content = toml.load(the_file)
if tomli is None: # pragma: no cover.
raise ImportError('On python<=3.10 tomli needs to be installed.') # pragma: no cover.
with open(path, 'rb') as the_file:
content = tomli.load(the_file)
elif file_type == 'pickle':
with open(path, 'rb') as the_file:
content = the_file.read()
Expand Down Expand Up @@ -495,10 +502,10 @@ def _save_content(content, path, file_type, keep_backup=True):
with open(path, 'w') as the_file:
content = yaml.safe_dump(content, stream=the_file)
elif file_type == 'toml':
if toml is None: # pragma: no cover.
raise ImportError('Toml needs to be installed.') # pragma: no cover.
with open(path, 'w') as the_file:
content = toml.dump(content, the_file)
if tomli_w is None: # pragma: no cover.
raise ImportError('Tomli-w needs to be installed.') # pragma: no cover.
with open(path, 'wb') as the_file:
content = tomli_w.dump(content, the_file)
elif file_type == 'pickle':
with open(path, 'wb') as the_file:
content = pickle_dump(content, file_obj=the_file)
Expand Down
47 changes: 47 additions & 0 deletions docs/delta.rst
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,50 @@ Expected the old value for root[0] to be 1 but it is 3. Error found on: while ch
[2]

And if you had set raise_errors=True, then it would have raised the error in addition to logging it.


.. _delta_force_label:

Delta Force
-----------

force : Boolean, default=False
force is used to force apply a delta to objects that have a very different structure.


>>> from deepdiff import DeepDiff, Delta
>>> t1 = {
... 'x': {
... 'y': [1, 2, 3]
... },
... 'q': {
... 'r': 'abc',
... }
... }
>>>
>>> t2 = {
... 'x': {
... 'y': [1, 2, 3, 4]
... },
... 'q': {
... 'r': 'abc',
... 't': 0.5,
... }
... }
>>>
>>> diff = DeepDiff(t1, t2)
>>> diff
{'dictionary_item_added': [root['q']['t']], 'iterable_item_added': {"root['x']['y'][3]": 4}}
>>> delta = Delta(diff)
>>> {} + delta
Unable to get the item at root['x']['y'][3]: 'x'
Unable to get the item at root['q']['t']
{}

# Once we set the force to be True

>>> delta = Delta(diff, force=True)
>>> {} + delta
{'x': {'y': {3: 4}}, 'q': {'t': 0.5}}

Notice that the force attribute does not know the original object at ['x']['y'] was supposed to be a list, so it assumes it was a dictionary.
5 changes: 5 additions & 0 deletions requirements-dev-3.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ pytest==7.1.2
python-dotenv==0.20.0
python-dateutil==2.8.2
wheel==0.38.1
tomli==2.0.0
tomli-w==1.0.0
pydantic==1.10.8
python_dateutil==2.8.2
tomli_w==1.0.0
3 changes: 3 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ flake8==6.0.0
python-dateutil==2.8.2
orjson==3.8.3
wheel==0.38.1
tomli==2.0.0
tomli-w==1.0.0
pydantic==1.10.8
Loading

0 comments on commit d52b2da

Please sign in to comment.