From 9c7d70e2adf992fc74a2bf0c0fb014b47db8a5b2 Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Fri, 11 Oct 2024 13:34:35 +0200 Subject: [PATCH] fix extract_paths --- sisyphus/hash.py | 21 +++++++++++++++------ sisyphus/tools.py | 4 ++++ tests/hash_unittest.py | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/sisyphus/hash.py b/sisyphus/hash.py index 0b258c0..f9da9a8 100644 --- a/sisyphus/hash.py +++ b/sisyphus/hash.py @@ -44,6 +44,19 @@ def get_object_state(obj): Comment: Maybe obj.__reduce__() is a better idea? is it stable for hashing? """ + # Note: sis_hash_helper does not call get_object_state in these cases. + # However, other code might (e.g. extract_paths), + # so we keep consistent to the behavior of sis_hash_helper. + if obj is None: + return None + if isinstance(obj, (bool, int, float, complex, str)): + return obj + if isfunction(obj) or isclass(obj): + # Note: sis_hash_helper does not call get_object_state in these cases. + # However, other code might (e.g. extract_paths), + # so we keep consistent to the behavior of sis_hash_helper. + return obj.__module__, obj.__qualname__ + if isinstance(obj, pathlib.PurePath): # pathlib paths have a somewhat technical internal state # ('_drv', '_root', '_parts', '_str', '_hash', '_pparts', '_cached_cparts'), @@ -58,18 +71,14 @@ def get_object_state(obj): else: args = None - if hasattr(obj, "__sis_state__") and not isinstance(obj, type): + if hasattr(obj, "__sis_state__"): state = obj.__sis_state__() # Note: Since Python 3.11, there is a default object.__getstate__. # However, this default object.__getstate__ is not correct for some native types, e.g. _functools.partial. # https://github.com/rwth-i6/sisyphus/issues/207 # https://github.com/python/cpython/issues/125094 # Thus, only use __getstate__ if it is not the default object.__getstate__. - elif ( - hasattr(obj, "__getstate__") - and obj.__class__.__getstate__ is not getattr(object, "__getstate__", None) - and not isinstance(obj, type) - ): + elif hasattr(obj, "__getstate__") and obj.__class__.__getstate__ is not getattr(object, "__getstate__", None): state = obj.__getstate__() else: state = _getmembers(obj) diff --git a/sisyphus/tools.py b/sisyphus/tools.py index 30ba22b..e4a8026 100644 --- a/sisyphus/tools.py +++ b/sisyphus/tools.py @@ -85,6 +85,10 @@ def extract_paths(args: Any) -> Set: if id(obj) in visited_obj_ids: continue visited_obj_ids[id(obj)] = obj + if obj is None: + continue + if isinstance(obj, (bool, int, float, complex, str)): + continue if isinstance(obj, Block) or isinstance(obj, enum.Enum): continue if hasattr(obj, "_sis_path") and obj._sis_path is True and not type(obj) is type: diff --git a/tests/hash_unittest.py b/tests/hash_unittest.py index ad804f2..b80a508 100644 --- a/tests/hash_unittest.py +++ b/tests/hash_unittest.py @@ -23,6 +23,22 @@ def d(): self.assertEqual(sis_hash_helper(b), b"(function, (tuple, (str, '" + __name__.encode() + b"'), (str, 'b')))") self.assertRaises(AssertionError, sis_hash_helper, c) + def test_get_object_state_cls(self): + # Note: the hash of a class currently does not depend on get_object_state, + # but there is special logic in sis_hash_helper for classes, + # thus it doesn't really matter for the hash what is being returned here. + # However, this is used by extract_paths, so we test it here. + s = get_object_state(str) + self.assertEqual(s, ("builtins", "str")) + + def test_get_object_state_function(self): + # Note: the hash of a function currently does not depend on get_object_state, + # but there is special logic in sis_hash_helper for functions, + # thus it doesn't really matter for the hash what is being returned here. + # However, this is used by extract_paths, so we test it here. + s = get_object_state(b) + self.assertEqual(s, (b.__module__, b.__name__)) + def test_enum(self): self.assertEqual( sis_hash_helper(MyEnum.Entry1),