diff --git a/AUTHORS.md b/AUTHORS.md index 4eeb0a88..18e340ef 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -35,3 +35,4 @@ Authors in order of the contributions: - Florian Klien [flowolf](https://github.com/flowolf) for adding math_epsilon - Tim Klein [timjklein36](https://github.com/timjklein36) for retaining the order of multiple dictionary items added via Delta. - Wilhelm Schürmann[wbsch](https://github.com/wbsch) for fixing the typo with yml files. +- [lyz-code](https://github.com/lyz-code) for adding support for regular expressions. diff --git a/CHANGELOG.md b/CHANGELOG.md index 5203babc..582073d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # DeepDiff Change log +- v5-3-0: add support for regular expressions - v5-2-3: Retaining the order of multiple dictionary items added via Delta. Fixed the typo with yml files in deep cli. Fixing Grep RecursionError where using non UTF-8 character. Allowing kwargs to be passed to to_json method. - v5-2-2: Fixed Delta serialization when None type is present. - v5-2-0: Removed Murmur3 as the preferred hashing method. Using SHA256 by default now. Added commandline for deepdiff. Added group_by. Added math_epsilon. Improved ignoring of NoneType. diff --git a/LICENSE b/LICENSE index 424242c7..e09c3d78 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2014 - 2020 Sep Dehpour (Seperman) and contributors +Copyright (c) 2014 - 2021 Sep Dehpour (Seperman) and contributors www.zepworks.com Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/README.md b/README.md index 9bd2956c..9bf7de2b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DeepDiff v 5.2.3 +# DeepDiff v 5.3.0 ![Downloads](https://img.shields.io/pypi/dm/deepdiff.svg?style=flat) ![Python Versions](https://img.shields.io/pypi/pyversions/deepdiff.svg?style=flat) @@ -18,8 +18,20 @@ Tested on Python 3.6+ and PyPy3. **NOTE: The last version of DeepDiff to work on Python 3.5 was DeepDiff 5-0-2** -- [Documentation](https://zepworks.com/deepdiff/5.2.3/) +- [Documentation](https://zepworks.com/deepdiff/5.3.0/) +## What is new? + +Deepdiff 5.3.0 comes with regular expressions in the DeepSearch and grep modules: + +```python +>>> from deepdiff import grep +>>> from pprint import pprint +>>> obj = ["something here", {"long": "somewhere", "someone": 2, 0: 0, "somewhere": "around"}] +>>> ds = obj | grep("some.*", use_regexp=True) +{ 'matched_paths': ["root[1]['someone']", "root[1]['somewhere']"], + 'matched_values': ['root[0]', "root[1]['long']"]} +``` ## Installation @@ -54,13 +66,13 @@ Note: if you want to use DeepDiff via commandline, make sure to run `pip install DeepDiff gets the difference of 2 objects. -> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.2.3/diff.html) -> - The full documentation of all modules can be found on +> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.3.0/diff.html) +> - The full documentation of all modules can be found on > - Tutorials and posts about DeepDiff can be found on ## A few Examples -> Note: This is just a brief overview of what DeepDiff can do. Please visit for full documentation. +> Note: This is just a brief overview of what DeepDiff can do. Please visit for full documentation. ### List difference ignoring order or duplicates @@ -264,8 +276,8 @@ Example: ``` -> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.2.3/diff.html) -> - The full documentation can be found on +> - Please take a look at the [DeepDiff docs](https://zepworks.com/deepdiff/5.3.0/diff.html) +> - The full documentation can be found on # Deep Search @@ -297,8 +309,8 @@ And you can pass all the same kwargs as DeepSearch to grep too: {'matched_paths': {"root['somewhere']": 'around'}, 'matched_values': {"root['long']": 'somewhere'}} ``` -> - Please take a look at the [DeepSearch docs](https://zepworks.com/deepdiff/5.2.3/dsearch.html) -> - The full documentation can be found on +> - Please take a look at the [DeepSearch docs](https://zepworks.com/deepdiff/5.3.0/dsearch.html) +> - The full documentation can be found on # Deep Hash (New in v4-0-0) @@ -306,8 +318,8 @@ And you can pass all the same kwargs as DeepSearch to grep too: DeepHash is designed to give you hash of ANY python object based on its contents even if the object is not considered hashable! DeepHash is supposed to be deterministic in order to make sure 2 objects that contain the same data, produce the same hash. -> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.2.3/deephash.html) -> - The full documentation can be found on +> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.3.0/deephash.html) +> - The full documentation can be found on Let's say you have a dictionary object. @@ -355,8 +367,8 @@ Which you can write as: At first it might seem weird why DeepHash(obj)[obj] but remember that DeepHash(obj) is a dictionary of hashes of all other objects that obj contains too. -> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.2.3/deephash.html) -> - The full documentation can be found on +> - Please take a look at the [DeepHash docs](https://zepworks.com/deepdiff/5.3.0/deephash.html) +> - The full documentation can be found on # Using DeepDiff in unit tests diff --git a/deepdiff/__init__.py b/deepdiff/__init__.py index fcde3bc6..fe20a6ad 100644 --- a/deepdiff/__init__.py +++ b/deepdiff/__init__.py @@ -1,6 +1,6 @@ """This module offers the DeepDiff, DeepSearch, grep, Delta and DeepHash classes.""" # flake8: noqa -__version__ = '5.2.3' +__version__ = '5.3.0' import logging if __name__ == '__main__': diff --git a/deepdiff/search.py b/deepdiff/search.py index 4226a6a4..799db9e7 100644 --- a/deepdiff/search.py +++ b/deepdiff/search.py @@ -4,7 +4,9 @@ from deepdiff.helper import OrderedSetPlus import logging -from deepdiff.helper import strings, numbers, add_to_frozen_set, get_doc, dict_ +from deepdiff.helper import ( + strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE +) logger = logging.getLogger(__name__) @@ -40,6 +42,8 @@ class DeepSearch(dict): If True, the value of the object or its children have to exactly match the item. If False, the value of the item can be a part of the value of the object or its children + use_regexp: Boolean, default = False + **Returns** A DeepSearch object that has the matched paths and matched values. @@ -83,6 +87,7 @@ def __init__(self, verbose_level=1, case_sensitive=False, match_string=False, + use_regexp=False, **kwargs): if kwargs: raise ValueError(( @@ -104,6 +109,9 @@ def __init__(self, matched_paths=self.__set_or_dict(), matched_values=self.__set_or_dict(), unprocessed=[]) + self.use_regexp = use_regexp + if self.use_regexp: + item = re.compile(item) # Cases where user wants to match exact string item self.match_string = match_string @@ -135,7 +143,7 @@ def __search_obj(self, if obj == item: found = True # We report the match but also continue inside the match to see if there are - # furthur matches inside the `looped` object. + # further matches inside the `looped` object. self.__report(report_key='matched_values', key=parent, value=obj) try: @@ -205,7 +213,8 @@ def __search_dict(self, str_item = str(item) if (self.match_string and str_item == new_parent_cased) or\ - (not self.match_string and str_item in new_parent_cased): + (not self.match_string and str_item in new_parent_cased) or\ + (self.use_regexp and item.search(new_parent_cased)): self.__report( report_key='matched_paths', key=new_parent, @@ -233,7 +242,7 @@ def __search_iterable(self, else: thing_cased = thing.lower() - if thing_cased == item: + if not self.use_regexp and thing_cased == item: self.__report( report_key='matched_values', key=new_parent, value=thing) else: @@ -248,7 +257,12 @@ def __search_str(self, obj, item, parent): """Compare strings""" obj_text = obj if self.case_sensitive else obj.lower() - if (self.match_string and item == obj_text) or (not self.match_string and item in obj_text): + is_matched = False + if self.use_regexp: + is_matched = item.search(obj_text) + elif (self.match_string and item == obj_text) or (not self.match_string and item in obj_text): + is_matched = True + if is_matched: self.__report(report_key='matched_values', key=parent, value=obj) def __search_numbers(self, obj, item, parent): @@ -270,11 +284,10 @@ def __search_tuple(self, obj, item, parent, parents_ids): def __search(self, obj, item, parent="root", parents_ids=frozenset()): """The main search method""" - # import pytest; pytest.set_trace() if self.__skip_this(item, parent): return - elif isinstance(obj, strings) and isinstance(item, strings): + elif isinstance(obj, strings) and isinstance(item, (strings, RE_COMPILED_TYPE)): self.__search_str(obj, item, parent) elif isinstance(obj, strings) and isinstance(item, numbers): diff --git a/docs/authors.rst b/docs/authors.rst index ee86f776..8eb0c36c 100644 --- a/docs/authors.rst +++ b/docs/authors.rst @@ -42,6 +42,8 @@ Thanks to the following people for their contributions: - Florian Klien `flowolf`_ for adding math_epsilon - Tim Klein `timjklein36`_ for retaining the order of multiple dictionary items added via Delta - Wilhelm Schürmann `wbsch`_ for fixing the typo with yml files. +- `lyz_code`_ for adding support for regular expressions. + .. _Sep Dehpour (Seperman): http://www.zepworks.com .. _Victor Hahn Castell: http://hahncastell.de @@ -74,6 +76,7 @@ Thanks to the following people for their contributions: .. _flowolf: https://github.com/flowolf .. _timjklein36: https://github.com/timjklein36 .. _wbsch: https://github.com/wbsch +.. _lyz_code: https://github.com/lyz-code Back to :doc:`/index` diff --git a/docs/changelog.rst b/docs/changelog.rst index ba6274de..0a80a639 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -5,6 +5,7 @@ Changelog DeepDiff Changelog +- v5-3-0: add support for regular expressions - v5-2-3: Retaining the order of multiple dictionary items added via Delta. Fixed the typo with yml files in deep cli. Fixing Grep RecursionError where using non UTF-8 character. Allowing kwargs to be passed to to_json method. - v5-2-2: Fixed Delta serialization when None type is present. - v5-2-0: Removed Murmur3 as the preferred hashing method. Using SHA256 by default now. Added commandline for deepdiff. Added group_by. Added math_epsilon. Improved ignoring of NoneType. diff --git a/docs/conf.py b/docs/conf.py index 9457fbd7..88153f38 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,9 +60,9 @@ # built documents. # # The short X.Y version. -version = '5.2.3' +version = '5.3.0' # The full version, including alpha/beta/rc tags. -release = '5.2.3' +release = '5.3.0' load_dotenv(override=True) DOC_VERSION = os.environ.get('DOC_VERSION', version) diff --git a/docs/index.rst b/docs/index.rst index 57b4277e..4378a786 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -4,7 +4,7 @@ contain the root `toctree` directive. -DeepDiff 5.2.3 documentation! +DeepDiff 5.3.0 documentation! ============================= ***************** @@ -35,6 +35,23 @@ DeepDiff is rigorously tested against Python 3.6, 3.7, 3.8, 3.9 and Pypy3 NOTE: Python 2 is not supported any more. DeepDiff v3.3.0 was the last version to supprt Python 2. +*********** +What is New +*********** + +Deepdiff 5.3.0 comes with regular expressions in the DeepSearch and grep modules: + + +.. code:: python + + >>> from deepdiff import grep + >>> from pprint import pprint + >>> obj = ["something here", {"long": "somewhere", "someone": 2, 0: 0, "somewhere": "around"}] + >>> ds = obj | grep("some.*", use_regexp=True) + { 'matched_paths': ["root[1]['someone']", "root[1]['somewhere']"], + 'matched_values': ['root[0]', "root[1]['long']"]} + + ********* Tutorials ********* diff --git a/docs/search_doc.rst b/docs/search_doc.rst index 1f4117b7..cd8da261 100644 --- a/docs/search_doc.rst +++ b/docs/search_doc.rst @@ -22,3 +22,9 @@ Search in nested data for string { 'matched_paths': {"root[1]['somewhere']": 'around'}, 'matched_values': { 'root[0]': 'something somewhere', "root[1]['long']": 'somewhere'}} + +You can also use regular expressions + >>> obj = ["something here", {"long": "somewhere", "someone": 2, 0: 0, "somewhere": "around"}] + >>> ds = obj | grep("some.*", use_regexp=True) + { 'matched_paths': ["root[1]['someone']", "root[1]['somewhere']"], + 'matched_values': ['root[0]', "root[1]['long']"]} diff --git a/requirements-cli.txt b/requirements-cli.txt index 16e2df7e..7518df0d 100644 --- a/requirements-cli.txt +++ b/requirements-cli.txt @@ -1,4 +1,4 @@ click==7.1.2 -pyyaml==5.3.1 +pyyaml==5.4 toml==0.10.2 -clevercsv==0.6.6 +clevercsv==0.6.7 diff --git a/setup.cfg b/setup.cfg index 2a0b0cf3..abc1f4a4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 5.2.3 +current_version = 5.3.0 commit = True tag = True tag_name = {new_version} diff --git a/setup.py b/setup.py index 50502c25..3eb90426 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ if os.environ.get('USER', '') == 'vagrant': del os.link -version = '5.2.3' +version = '5.3.0' def get_reqs(filename): diff --git a/tests/test_search.py b/tests/test_search.py index 247e648e..b97f15ac 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -336,6 +336,95 @@ class Child(Parent): result = {'matched_values': {'root.a'}} assert DeepSearch(obj, item, verbose_level=1) == result + def test_dont_use_regex_by_default(self): + obj = "long string somewhere" + item = "some.*" + result = {} + assert DeepSearch(obj, item, verbose_level=1) == result + + def test_regex_in_string(self): + obj = "long string somewhere" + item = "some.*" + result = {"matched_values": {"root"}} + assert DeepSearch(obj, item, verbose_level=1, use_regexp=True) == result + + def test_regex_does_not_match_the_regex_string_itself(self): + obj = ["We like python", "but not (?:p|t)ython"] + item = "(?:p|t)ython" + result = {'matched_values': ['root[0]']} + assert DeepSearch(obj, item, verbose_level=1, use_regexp=True) == result + + def test_regex_in_string_in_tuple(self): + obj = ("long", "string", 0, "somewhere") + item = "some.*" + result = {"matched_values": {"root[3]"}} + assert DeepSearch(obj, item, verbose_level=1, use_regexp=True) == result + + def test_regex_in_string_in_list(self): + obj = ["long", "string", 0, "somewhere"] + item = "some.*" + result = {"matched_values": {"root[3]"}} + assert DeepSearch(obj, item, verbose_level=1, use_regexp=True) == result + + def test_regex_in_string_in_dictionary(self): + obj = {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"} + result = { + "matched_paths": {"root['somewhere']"}, + "matched_values": {"root['long']"}, + } + item = "some.*" + ds = DeepSearch(obj, item, verbose_level=1, use_regexp=True) + assert ds == result + + def test_regex_in_string_in_dictionary_in_list_verbose(self): + obj = [ + "something somewhere", + {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}, + ] + result = { + "matched_paths": {"root[1]['somewhere']": "around"}, + "matched_values": { + "root[1]['long']": "somewhere", + "root[0]": "something somewhere", + }, + } + item = "some.*" + ds = DeepSearch(obj, item, verbose_level=2, use_regexp=True) + assert ds == result + + def test_regex_in_custom_object(self): + obj = CustomClass("here, something", "somewhere") + result = {"matched_values": {"root.b"}} + item = "somew.*" + ds = DeepSearch(obj, item, verbose_level=1, use_regexp=True) + assert ds == result + + def test_regex_in_custom_object_in_dictionary_verbose(self): + obj = {1: CustomClass("here, something", "somewhere out there")} + result = {"matched_values": {"root[1].b": "somewhere out there"}} + item = "somew.*" + ds = DeepSearch(obj, item, verbose_level=2, use_regexp=True) + assert ds == result + + def test_regex_in_named_tuples_verbose(self): + from collections import namedtuple + + Point = namedtuple("Point", ["x", "somewhere_good"]) + obj = Point(x="my keys are somewhere", somewhere_good=22) + item = "some.*" + ds = DeepSearch(obj, item, verbose_level=2, use_regexp=True) + result = { + "matched_values": {"root.x": "my keys are somewhere"}, + "matched_paths": {"root.somewhere_good": 22}, + } + assert ds == result + + def test_regex_in_string_in_set_verbose(self): + obj = {"long", "string", 0, "somewhere"} + item = "some.*" + ds = DeepSearch(obj, item, verbose_level=2, use_regexp=True) + assert list(ds["matched_values"].values())[0] == "somewhere" + class TestGrep: @@ -365,3 +454,9 @@ def test_grep_with_non_utf8_chars(self): item = {"z": "z"} result = obj | grep(item) assert {} == result + + def test_grep_regex_in_string_in_tuple(self): + obj = ("long", "string", 0, "somewhere") + item = "some.*" + result = {"matched_values": {"root[3]"}} + assert obj | grep(item, verbose_level=1, use_regexp=True) == result