Skip to content

Commit

Permalink
fixes #408 and #298 which had problem with pickling deepdiff for multi
Browse files Browse the repository at this point in the history
processing due to lambda functions.
  • Loading branch information
seperman committed Sep 14, 2023
1 parent 450634a commit 2f62074
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 5 deletions.
12 changes: 7 additions & 5 deletions deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def __init__(self,
self.custom_operators = custom_operators or []
self.ignore_order = ignore_order

self.ignore_order_func = ignore_order_func or (lambda *_args, **_kwargs: ignore_order)
self.ignore_order_func = ignore_order_func

ignore_type_in_groups = ignore_type_in_groups or []
if numbers == ignore_type_in_groups or numbers in ignore_type_in_groups:
Expand Down Expand Up @@ -649,7 +649,7 @@ def _iterables_subscriptable(t1, t2):

def _diff_iterable(self, level, parents_ids=frozenset(), _original_type=None, local_tree=None):
"""Difference of iterables"""
if self.ignore_order_func(level):
if (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order:
self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
else:
self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
Expand Down Expand Up @@ -1103,7 +1103,9 @@ def _get_most_in_common_pairs_in_iterables(
# And the objects with the same distances are grouped together in an ordered set.
# It also includes a "max" key that is just the value of the biggest current distance in the
# most_in_common_pairs dictionary.
most_in_common_pairs = defaultdict(lambda: defaultdict(OrderedSetPlus))
def defaultdict_orderedset():
return defaultdict(OrderedSetPlus)
most_in_common_pairs = defaultdict(defaultdict_orderedset)
pairs = dict_()

pre_calced_distances = None
Expand Down Expand Up @@ -1390,7 +1392,7 @@ def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None):
# which means numpy module needs to be available. So np can't be None.
raise ImportError(CANT_FIND_NUMPY_MSG) # pragma: no cover

if not self.ignore_order_func(level):
if (self.ignore_order_func and not self.ignore_order_func(level)) or not self.ignore_order:
# fast checks
if self.significant_digits is None:
if np.array_equal(level.t1, level.t2, equal_nan=self.ignore_nan_inequality):
Expand All @@ -1416,7 +1418,7 @@ def _diff_numpy_array(self, level, parents_ids=frozenset(), local_tree=None):
dimensions = len(shape)
if dimensions == 1:
self._diff_iterable(level, parents_ids, _original_type=_original_type, local_tree=local_tree)
elif self.ignore_order_func(level):
elif (self.ignore_order_func and self.ignore_order_func(level)) or self.ignore_order:
# arrays are converted to python lists so that certain features of DeepDiff can apply on them easier.
# They will be converted back to Numpy at their final dimension.
level.t1 = level.t1.tolist()
Expand Down
80 changes: 80 additions & 0 deletions tests/test_diff_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,27 @@
import datetime
from time import sleep
from unittest import mock
from functools import partial
from collections import namedtuple
from deepdiff import DeepHash
from deepdiff.helper import pypy3
from deepdiff.model import DiffLevel
from deepdiff.diff import (
DeepDiff, PROGRESS_MSG, INVALID_VIEW_MSG, VERBOSE_LEVEL_RANGE_MSG,
PURGE_LEVEL_RANGE_MSG)
from concurrent.futures.process import ProcessPoolExecutor
from concurrent.futures import as_completed

# Only the prep part of DeepHash. We don't need to test the actual hash function.
DeepHashPrep = partial(DeepHash, apply_hash=False)


def prep_str(obj, ignore_string_type_changes=True):
return obj if ignore_string_type_changes else 'str:{}'.format(obj)


Point = namedtuple('Point', ["x"])
point_obj = Point(x=11)


class SlowDiffLevel(DiffLevel):
Expand Down Expand Up @@ -120,3 +137,66 @@ def test_bool_str2(self):
def test_get_distance_cache_key(self):
result = DeepDiff._get_distance_cache_key(added_hash=5, removed_hash=20)
assert b'0x14--0x5dc' == result

def test_multi_processing1(self):

t1 = [[1, 2, 3, 9], [1, 2, 4, 10]]
t2 = [[1, 2, 4, 10], [1, 2, 3, 10]]

futures = []
expected_result = {
'values_changed': {
'root[0][2]': {
'new_value': 4,
'old_value': 3
},
'root[0][3]': {
'new_value': 10,
'old_value': 9
},
'root[1][2]': {
'new_value': 3,
'old_value': 4
}
}
}

with ProcessPoolExecutor(max_workers=1) as executor:
futures.append(executor.submit(DeepDiff, t1, t2))

for future in as_completed(futures, timeout=10):
assert not future._exception
assert expected_result == future._result

def test_multi_processing2_with_ignore_order(self):

t1 = [[1, 2, 3, 9], [1, 2, 4, 10]]
t2 = [[1, 2, 4, 10], [1, 2, 3, 10]]

futures = []
expected_result = {'values_changed': {'root[0][3]': {'new_value': 10, 'old_value': 9}}}

with ProcessPoolExecutor(max_workers=1) as executor:
futures.append(executor.submit(DeepDiff, t1, t2, ignore_order=True))

for future in as_completed(futures, timeout=10):
assert not future._exception
assert expected_result == future._result

@pytest.mark.skipif(pypy3, reason="pypy3 expected results are different")
def test_multi_processing3_deephash(self):
x = "x"
x_prep = prep_str(x)
expected_result = {
x: x_prep,
point_obj: "ntPoint:{%s:int:11}" % x,
11: 'int:11',
}

futures = []
with ProcessPoolExecutor(max_workers=1) as executor:
futures.append(executor.submit(DeepHashPrep, point_obj, ignore_string_type_changes=True))

for future in as_completed(futures, timeout=10):
assert not future._exception
assert expected_result == future._result

0 comments on commit 2f62074

Please sign in to comment.