From d07956268680d1b53ac96372d59e4946ae8d6592 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 7 Oct 2024 11:58:21 -0400 Subject: [PATCH] fix[tool]: update VarAccess pickle implementation (#4270) fix a bug where unpickling `annotated_vyper_module` would lead to a crash: ``` AttributeError: 'VarAccess' object has no attribute 'variable' ``` this is a blocker for tooling, for instance, titanoboa relies on pickle/unpickle to cache `CompilerData` objects: https://github.com/vyperlang/titanoboa/blob/86df8936654db2068641/boa/util/disk_cache.py#L65-L66 the underlying issue is that `pickle.loads()` calls `obj.__hash__()` for objects that are keys in a hashed data structure - namely, dicts, sets and frozensets. this causes a crash when there is a cycle in the object graph, because the object is not fully instantiated at the time that `__hash__()` is called. this is a cpython issue, reported at python/cpython#124937. @serhiy-storchaka suggested the approach taken in this PR, which breaks the loop before pickling: https://github.com/python/cpython/issues/124937#issuecomment-2392227290 note that the implementation of `__reduce__()` in this PR is safe, since there is no cycle in the hash function itself, since the recursion breaks in `VarInfo.__hash__()`. in other words, there is no possibility of `VarAccess.__hash__()` changing mid-way through reconstructing the object. --- tests/integration/test_pickle_ast.py | 19 +++++++++++++++++++ vyper/semantics/analysis/base.py | 14 ++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tests/integration/test_pickle_ast.py diff --git a/tests/integration/test_pickle_ast.py b/tests/integration/test_pickle_ast.py new file mode 100644 index 0000000000..2c6144603a --- /dev/null +++ b/tests/integration/test_pickle_ast.py @@ -0,0 +1,19 @@ +import copy +import pickle + +from vyper.compiler.phases import CompilerData + + +def test_pickle_ast(): + code = """ +@external +def foo(): + self.bar() + y: uint256 = 5 + x: uint256 = 5 +def bar(): + pass + """ + f = CompilerData(code) + copy.deepcopy(f.annotated_vyper_module) + pickle.loads(pickle.dumps(f.annotated_vyper_module)) diff --git a/vyper/semantics/analysis/base.py b/vyper/semantics/analysis/base.py index 65bc8df3ab..982b6eb01d 100644 --- a/vyper/semantics/analysis/base.py +++ b/vyper/semantics/analysis/base.py @@ -1,5 +1,5 @@ import enum -from dataclasses import dataclass +from dataclasses import dataclass, fields from functools import cached_property from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Union @@ -234,6 +234,17 @@ class VarAccess: # A sentinel indicating a subscript access SUBSCRIPT_ACCESS: ClassVar[Any] = object() + # custom __reduce__ and _produce implementations to work around + # a pickle bug. + # see https://github.com/python/cpython/issues/124937#issuecomment-2392227290 + def __reduce__(self): + dict_obj = {f.name: getattr(self, f.name) for f in fields(self)} + return self.__class__._produce, (dict_obj,) + + @classmethod + def _produce(cls, data): + return cls(**data) + @cached_property def attrs(self): ret = [] @@ -286,7 +297,6 @@ def __post_init__(self): for attr in should_match: if getattr(self.var_info, attr) != getattr(self, attr): raise CompilerPanic(f"Bad analysis: non-matching {attr}: {self}") - self._writes: OrderedSet[VarAccess] = OrderedSet() self._reads: OrderedSet[VarAccess] = OrderedSet()