From 69e5c0541a9b23ad1b085e0a89b545124716b516 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Fri, 14 Jun 2024 13:31:45 -0700 Subject: [PATCH 01/12] feat[test]: implement `abi_decode` spec test (#4095) this commit implements a spec-based differential fuzzer for `abi_decode`. it introduces several components: - a "spec" implementation of `abi_decode`, which is how vyper's abi_decode should behave on a given payload, implemented in python - a hypothesis strategy to draw vyper types - hypothesis strategy to create valid data for a given vyper type - a hypothesis strategy to _mutate_ a given payload which is designed to introduce faults in the decoder. testing indicated splicing pointers into the payload - either valid pointers or "nearly" valid pointers - had the highest success rate for finding bugs in the decoder. the intuition here is that the most difficult part of the decoder is validating out-of-bound pointers in the payload, so pointers represent "semantically high-value" data to the fuzzer. - some hypothesis tuning to ensure a good distribution of types over several days of testing+tuning, this fuzzer independently found the bugs fixed in 44bb281ccaa and 21f7172274e (which were originally found by manual review). --- tests/conftest.py | 2 +- tests/evm_backends/base_env.py | 12 +- tests/evm_backends/revm_env.py | 2 + .../functional/builtins/codegen/abi_decode.py | 148 +++++++ .../builtins/codegen/test_abi_decode_fuzz.py | 416 ++++++++++++++++++ vyper/codegen/core.py | 12 +- vyper/semantics/types/subscriptable.py | 5 +- 7 files changed, 592 insertions(+), 5 deletions(-) create mode 100644 tests/functional/builtins/codegen/abi_decode.py create mode 100644 tests/functional/builtins/codegen/test_abi_decode_fuzz.py diff --git a/tests/conftest.py b/tests/conftest.py index 4b3d90f65a..31c72246bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -196,7 +196,7 @@ def env(gas_limit, evm_version, evm_backend, tracing, account_keys) -> BaseEnv: ) -@pytest.fixture +@pytest.fixture(scope="module") def get_contract_from_ir(env, optimize): def ir_compiler(ir, *args, **kwargs): ir = IRnode.from_list(ir) diff --git a/tests/evm_backends/base_env.py b/tests/evm_backends/base_env.py index a8ab4d2367..1ea3dba328 100644 --- a/tests/evm_backends/base_env.py +++ b/tests/evm_backends/base_env.py @@ -30,7 +30,7 @@ class ExecutionResult: gas_used: int -class EvmError(RuntimeError): +class EvmError(Exception): """Exception raised when a call fails.""" @@ -205,6 +205,16 @@ def out_of_gas_error(self) -> str: """Expected error message when user runs out of gas""" raise NotImplementedError # must be implemented by subclasses + @property + def contract_size_limit_error(self) -> str: + """Expected error message when contract is over codesize limit""" + raise NotImplementedError # must be implemented by subclasses + + @property + def initcode_size_limit_error(self) -> str: + """Expected error message when contract is over codesize limit""" + raise NotImplementedError # must be implemented by subclasses + def _compile( source_code: str, diff --git a/tests/evm_backends/revm_env.py b/tests/evm_backends/revm_env.py index 5c8b8aba08..d5a7570f96 100644 --- a/tests/evm_backends/revm_env.py +++ b/tests/evm_backends/revm_env.py @@ -11,6 +11,8 @@ class RevmEnv(BaseEnv): invalid_opcode_error = "InvalidFEOpcode" out_of_gas_error = "OutOfGas" + contract_size_limit_error = "CreateContractSizeLimit" + initcode_size_limit_error = "CreateInitCodeSizeLimit" def __init__( self, diff --git a/tests/functional/builtins/codegen/abi_decode.py b/tests/functional/builtins/codegen/abi_decode.py new file mode 100644 index 0000000000..9e10b862d5 --- /dev/null +++ b/tests/functional/builtins/codegen/abi_decode.py @@ -0,0 +1,148 @@ +from typing import TYPE_CHECKING, Iterable + +from eth_utils import to_checksum_address + +from vyper.abi_types import ( + ABI_Address, + ABI_Bool, + ABI_Bytes, + ABI_BytesM, + ABI_DynamicArray, + ABI_GIntM, + ABI_StaticArray, + ABI_String, + ABI_Tuple, + ABIType, +) +from vyper.utils import int_bounds, unsigned_to_signed + +if TYPE_CHECKING: + from vyper.semantics.types import VyperType + + +class DecodeError(Exception): + pass + + +def _strict_slice(payload, start, length): + if start < 0: + raise DecodeError(f"OOB {start}") + + end = start + length + if end > len(payload): + raise DecodeError(f"OOB {start} + {length} (=={end}) > {len(payload)}") + return payload[start:end] + + +def _read_int(payload, ofst): + return int.from_bytes(_strict_slice(payload, ofst, 32)) + + +# vyper abi_decode spec implementation +def spec_decode(typ: "VyperType", payload: bytes): + abi_t = typ.abi_type + + lo, hi = abi_t.static_size(), abi_t.size_bound() + if not (lo <= len(payload) <= hi): + raise DecodeError(f"bad payload size {lo}, {len(payload)}, {hi}") + + return _decode_r(abi_t, 0, payload) + + +def _decode_r(abi_t: ABIType, current_offset: int, payload: bytes): + if isinstance(abi_t, ABI_Tuple): + return tuple(_decode_multi_r(abi_t.subtyps, current_offset, payload)) + + if isinstance(abi_t, ABI_StaticArray): + n = abi_t.m_elems + subtypes = [abi_t.subtyp] * n + return _decode_multi_r(subtypes, current_offset, payload) + + if isinstance(abi_t, ABI_DynamicArray): + bound = abi_t.elems_bound + + n = _read_int(payload, current_offset) + if n > bound: + raise DecodeError("Dynarray too large") + + # offsets in dynarray start from after the length word + current_offset += 32 + subtypes = [abi_t.subtyp] * n + return _decode_multi_r(subtypes, current_offset, payload) + + # sanity check + assert not abi_t.is_complex_type() + + if isinstance(abi_t, ABI_Bytes): + bound = abi_t.bytes_bound + length = _read_int(payload, current_offset) + if length > bound: + raise DecodeError("bytes too large") + + current_offset += 32 # size of length word + ret = _strict_slice(payload, current_offset, length) + + # abi string doesn't actually define string decoder, so we + # just bytecast the output + if isinstance(abi_t, ABI_String): + # match eth-stdlib, since that's what we check against + ret = ret.decode(errors="surrogateescape") + + return ret + + # sanity check + assert not abi_t.is_dynamic() + + if isinstance(abi_t, ABI_GIntM): + ret = _read_int(payload, current_offset) + + # handle signedness + if abi_t.signed: + ret = unsigned_to_signed(ret, 256, strict=True) + + # bounds check + lo, hi = int_bounds(signed=abi_t.signed, bits=abi_t.m_bits) + if not (lo <= ret <= hi): + u = "" if abi_t.signed else "u" + raise DecodeError(f"invalid {u}int{abi_t.m_bits}") + + if isinstance(abi_t, ABI_Address): + return to_checksum_address(ret.to_bytes(20, "big")) + + if isinstance(abi_t, ABI_Bool): + if ret not in (0, 1): + raise DecodeError("invalid bool") + return ret + + return ret + + if isinstance(abi_t, ABI_BytesM): + ret = _strict_slice(payload, current_offset, 32) + m = abi_t.m_bytes + assert 1 <= m <= 32 # internal sanity check + # BytesM is right-padded with zeroes + if ret[m:] != b"\x00" * (32 - m): + raise DecodeError(f"invalid bytes{m}") + return ret[:m] + + raise RuntimeError("unreachable") + + +def _decode_multi_r(types: Iterable[ABIType], outer_offset: int, payload: bytes) -> list: + ret = [] + static_ofst = outer_offset + + for sub_t in types: + if sub_t.is_dynamic(): + # "head" terminology from abi spec + head = _read_int(payload, static_ofst) + ofst = outer_offset + head + else: + ofst = static_ofst + + item = _decode_r(sub_t, ofst, payload) + + ret.append(item) + static_ofst += sub_t.embedded_static_size() + + return ret diff --git a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py new file mode 100644 index 0000000000..d12b2cde7e --- /dev/null +++ b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py @@ -0,0 +1,416 @@ +from dataclasses import dataclass + +import hypothesis as hp +import hypothesis.strategies as st +import pytest +from eth.codecs import abi + +from tests.evm_backends.base_env import EvmError +from vyper.codegen.core import calculate_type_for_external_return, needs_external_call_wrap +from vyper.semantics.types import ( + AddressT, + BoolT, + BytesM_T, + BytesT, + DArrayT, + DecimalT, + HashMapT, + IntegerT, + SArrayT, + StringT, + TupleT, + VyperType, + _get_primitive_types, + _get_sequence_types, +) +from vyper.semantics.types.shortcuts import UINT256_T + +from .abi_decode import DecodeError, spec_decode + +pytestmark = pytest.mark.fuzzing + +type_ctors = [] +for t in _get_primitive_types().values(): + if t == HashMapT or t == DecimalT(): + continue + if isinstance(t, VyperType): + t = t.__class__ + if t in type_ctors: + continue + type_ctors.append(t) + +complex_static_ctors = [SArrayT, TupleT] +complex_dynamic_ctors = [DArrayT] +leaf_ctors = [t for t in type_ctors if t not in _get_sequence_types().values()] +static_leaf_ctors = [t for t in leaf_ctors if t._is_prim_word] +dynamic_leaf_ctors = [BytesT, StringT] + +MAX_MUTATIONS = 33 + + +@st.composite +# max type nesting +def vyper_type(draw, nesting=3, skip=None): + assert nesting >= 0 + + skip = skip or [] + + st_leaves = st.one_of(st.sampled_from(dynamic_leaf_ctors), st.sampled_from(static_leaf_ctors)) + st_complex = st.one_of( + st.sampled_from(complex_dynamic_ctors), st.sampled_from(complex_static_ctors) + ) + + if nesting == 0: + st_type = st_leaves + else: + st_type = st.one_of(st_complex, st_leaves) + + # filter here is a bit of a kludge, would be better to improve sampling + t = draw(st_type.filter(lambda t: t not in skip)) + + # note: maybe st.deferred is good here, we could define it with + # mutual recursion + def _go(skip=skip): + return draw(vyper_type(nesting=nesting - 1, skip=skip)) + + if t in (BytesT, StringT): + # arbitrary max_value + bound = draw(st.integers(min_value=1, max_value=1024)) + return t(bound) + + if t == SArrayT: + subtype = _go(skip=[TupleT, BytesT, StringT]) + bound = draw(st.integers(min_value=1, max_value=6)) + return t(subtype, bound) + if t == DArrayT: + subtype = _go(skip=[TupleT]) + bound = draw(st.integers(min_value=1, max_value=16)) + return t(subtype, bound) + + if t == TupleT: + # zero-length tuples are not allowed in vyper + n = draw(st.integers(min_value=1, max_value=6)) + subtypes = [_go() for _ in range(n)] + return TupleT(subtypes) + + if t in (BoolT, AddressT): + return t() + + if t == IntegerT: + signed = draw(st.booleans()) + bits = 8 * draw(st.integers(min_value=1, max_value=32)) + return t(signed, bits) + + if t == BytesM_T: + m = draw(st.integers(min_value=1, max_value=32)) + return t(m) + + raise RuntimeError("unreachable") + + +@st.composite +def data_for_type(draw, typ): + def _go(t): + return draw(data_for_type(t)) + + if isinstance(typ, TupleT): + return tuple(_go(item_t) for item_t in typ.member_types) + + if isinstance(typ, SArrayT): + return [_go(typ.value_type) for _ in range(typ.length)] + + if isinstance(typ, DArrayT): + n = draw(st.integers(min_value=0, max_value=typ.length)) + return [_go(typ.value_type) for _ in range(n)] + + if isinstance(typ, StringT): + # technically the ABI spec doesn't say string has to be valid utf-8, + # but eth-stdlib won't encode invalid utf-8 + return draw(st.text(max_size=typ.length)) + + if isinstance(typ, BytesT): + return draw(st.binary(max_size=typ.length)) + + if isinstance(typ, IntegerT): + lo, hi = typ.ast_bounds + return draw(st.integers(min_value=lo, max_value=hi)) + + if isinstance(typ, BytesM_T): + return draw(st.binary(min_size=typ.length, max_size=typ.length)) + + if isinstance(typ, BoolT): + return draw(st.booleans()) + + if isinstance(typ, AddressT): + ret = draw(st.binary(min_size=20, max_size=20)) + return "0x" + ret.hex() + + raise RuntimeError("unreachable") + + +def _sort2(x, y): + if x > y: + return y, x + return x, y + + +@st.composite +def _mutate(draw, payload, max_mutations=MAX_MUTATIONS): + # do point+bulk mutations, + # add/edit/delete/splice/flip up to max_mutations. + if len(payload) == 0: + return + + ret = bytearray(payload) + + # for add/edit, the new byte is any character, but we bias it towards + # bytes already in the payload. + st_any_byte = st.integers(min_value=0, max_value=255) + payload_nonzeroes = list(x for x in payload if x != 0) + if len(payload_nonzeroes) > 0: + st_existing_byte = st.sampled_from(payload) + st_byte = st.one_of(st_existing_byte, st_any_byte) + else: + st_byte = st_any_byte + + # add, edit, delete, word, splice, flip + possible_actions = "adwww" + actions = draw(st.lists(st.sampled_from(possible_actions), max_size=MAX_MUTATIONS)) + + for action in actions: + if len(ret) == 0: + # bail out. could we maybe be smarter, like only add here? + break + + # for the mutation position, we can use any index in the payload, + # but we bias it towards indices of nonzero bytes. + st_any_ix = st.integers(min_value=0, max_value=len(ret) - 1) + nonzero_indexes = [i for i, s in enumerate(ret) if s != 0] + if len(nonzero_indexes) > 0: + st_nonzero_ix = st.sampled_from(nonzero_indexes) + st_ix = st.one_of(st_any_ix, st_nonzero_ix) + else: + st_ix = st_any_ix + + ix = draw(st_ix) + + if action == "a": + ret.insert(ix, draw(st_byte)) + elif action == "e": + ret[ix] = draw(st_byte) + elif action == "d": + ret.pop(ix) + elif action == "w": + # splice word + st_uint256 = st.integers(min_value=0, max_value=2**256 - 1) + + # valid pointers, but maybe *just* out of bounds + st_poison = st.integers(min_value=-2 * len(ret), max_value=2 * len(ret)).map( + lambda x: x % (2**256) + ) + word = draw(st.one_of(st_poison, st_uint256)) + ret[ix - 31 : ix + 1] = word.to_bytes(32) + elif action == "s": + ix2 = draw(st_ix) + ix, ix2 = _sort2(ix, ix2) + ix2 += 1 + # max splice is 64 bytes, due to MAX_BUFFER_SIZE limitation in st.binary + ix2 = ix + (ix2 % 64) + length = ix2 - ix + substr = draw(st.binary(min_size=length, max_size=length)) + ret[ix:ix2] = substr + elif action == "f": + ix2 = draw(st_ix) + ix, ix2 = _sort2(ix, ix2) + ix2 += 1 + for i in range(ix, ix2): + # flip the bits in the byte + ret[i] = 255 ^ ret[i] + else: + raise RuntimeError("unreachable") + + return bytes(ret) + + +@st.composite +def payload_from(draw, typ): + data = draw(data_for_type(typ)) + schema = typ.abi_type.selector_name() + payload = abi.encode(schema, data) + + return draw(_mutate(payload)) + + +_settings = dict( + report_multiple_bugs=False, + # verbosity=hp.Verbosity.verbose, + suppress_health_check=( + hp.HealthCheck.data_too_large, + hp.HealthCheck.too_slow, + hp.HealthCheck.large_base_example, + ), + phases=( + hp.Phase.explicit, + hp.Phase.reuse, + hp.Phase.generate, + hp.Phase.target, + # Phase.shrink, # can force long waiting for examples + # Phase.explain, # not helpful here + ), +) + + +@dataclass(frozen=True) +class _TypeStats: + nesting: int = 0 + num_dynamic_types: int = 0 # number of dynamic types in the type + breadth: int = 0 # e.g. int16[50] has higher breadth than int16[1] + width: int = 0 # size of type + + +def _type_stats(typ: VyperType) -> _TypeStats: + def _finalize(): # little trick to save re-typing the arguments + width = typ.memory_bytes_required + return _TypeStats( + nesting=nesting, num_dynamic_types=num_dynamic_types, breadth=breadth, width=width + ) + + if typ._is_prim_word: + nesting = 0 + breadth = 1 + num_dynamic_types = 0 + return _finalize() + + if isinstance(typ, (BytesT, StringT)): + nesting = 0 + breadth = 1 # idk + num_dynamic_types = 1 + return _finalize() + + if isinstance(typ, TupleT): + substats = [_type_stats(t) for t in typ.member_types] + nesting = 1 + max(s.nesting for s in substats) + breadth = max(typ.length, *[s.breadth for s in substats]) + num_dynamic_types = sum(s.num_dynamic_types for s in substats) + return _finalize() + + if isinstance(typ, DArrayT): + substat = _type_stats(typ.value_type) + nesting = 1 + substat.nesting + breadth = max(typ.count, substat.breadth) + num_dynamic_types = 1 + substat.num_dynamic_types + return _finalize() + + if isinstance(typ, SArrayT): + substat = _type_stats(typ.value_type) + nesting = 1 + substat.nesting + breadth = max(typ.count, substat.breadth) + num_dynamic_types = substat.num_dynamic_types + return _finalize() + + raise RuntimeError("unreachable") + + +@pytest.fixture(scope="module") +def payload_copier(get_contract_from_ir): + # some contract which will return the buffer passed to it + # note: hardcode the location of the bytestring + ir = [ + "with", + "length", + ["calldataload", 36], + ["seq", ["calldatacopy", 0, 68, "length"], ["return", 0, "length"]], + ] + return get_contract_from_ir(["deploy", 0, ir, 0]) + + +PARALLELISM = 1 # increase on fuzzer box + + +# NOTE: this is a heavy test. 100 types * 100 payloads per type can take +# 3-4minutes on a regular CPU core. +@pytest.mark.parametrize("_n", list(range(PARALLELISM))) +@hp.given(typ=vyper_type()) +@hp.settings(max_examples=100, **_settings) +@hp.example(typ=DArrayT(DArrayT(UINT256_T, 2), 2)) +def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): + # import time + # t0 = time.time() + # print("ENTER", typ) + + wrapped_type = calculate_type_for_external_return(typ) + + stats = _type_stats(typ) + # for k, v in asdict(stats).items(): + # event(k, v) + hp.target(stats.num_dynamic_types) + # hp.target(typ.abi_type.is_dynamic() + typ.abi_type.is_complex_type())) + + # add max_mutations bytes worth of padding so we don't just get caught + # by bytes length check at function entry + type_bound = wrapped_type.abi_type.size_bound() + buffer_bound = type_bound + MAX_MUTATIONS + type_str = repr(typ) # annotation in vyper code + # TODO: intrinsic decode from staticcall/extcall + # TODO: _abi_decode from other sources (staticcall/extcall?) + # TODO: dirty the buffer + # TODO: check unwrap_tuple=False + code = f""" +@external +def run(xs: Bytes[{buffer_bound}]) -> {type_str}: + ret: {type_str} = abi_decode(xs, {type_str}) + return ret + +interface Foo: + def foo(xs: Bytes[{buffer_bound}]) -> {type_str}: view # STATICCALL + def bar(xs: Bytes[{buffer_bound}]) -> {type_str}: nonpayable # CALL + +@external +def run2(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: + assert len(xs) <= {type_bound} + return staticcall copier.foo(xs) + +@external +def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: + assert len(xs) <= {type_bound} + return (extcall copier.bar(xs)) + """ + c = get_contract(code) + + @hp.given(data=payload_from(wrapped_type)) + @hp.settings(max_examples=100, **_settings) + def _fuzz(data): + hp.note(f"type: {typ}") + hp.note(f"abi_t: {wrapped_type.abi_type.selector_name()}") + hp.note(code) + hp.note(data.hex()) + + try: + expected = spec_decode(wrapped_type, data) + + # unwrap if necessary + if needs_external_call_wrap(typ): + assert isinstance(expected, tuple) + (expected,) = expected + + hp.note(f"expected {expected}") + assert expected == c.run(data) + assert expected == c.run2(data, payload_copier.address) + assert expected == c.run3(data, payload_copier.address) + + except DecodeError: + # note EvmError includes reverts *and* exceptional halts. + # we can get OOG during abi decoding due to how + # `_abi_payload_size()` works + hp.note("expect failure") + with tx_failed(EvmError): + c.run(data) + with tx_failed(EvmError): + c.run2(data, payload_copier.address) + with tx_failed(EvmError): + c.run3(data, payload_copier.address) + + _fuzz() + + # t1 = time.time() + # print(f"elapsed {t1 - t0}s") diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index ff0f801d74..9a0a08097c 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -1169,8 +1169,12 @@ def clamp_bytestring(ir_node, hi=None): if hi is not None: assert t.maxlen < 2**64 # sanity check - # note: this add does not risk arithmetic overflow because + # NOTE: this add does not risk arithmetic overflow because # length is bounded by maxlen. + # however(!) _abi_payload_size can OOG, since it loads the word + # at `ir_node` to find the length of the bytearray, which could + # be out-of-bounds. + # if we didn't get OOG, we could overflow in `add`. item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) len_check = ["seq", ["assert", ["le", item_end, hi]], len_check] @@ -1189,8 +1193,12 @@ def clamp_dyn_array(ir_node, hi=None): if hi is not None: assert t.count < 2**64 # sanity check - # note: this add does not risk arithmetic overflow because + # NOTE: this add does not risk arithmetic overflow because # length is bounded by count * elemsize. + # however(!) _abi_payload_size can OOG, since it loads the word + # at `ir_node` to find the length of the bytearray, which could + # be out-of-bounds. + # if we didn't get OOG, we could overflow in `add`. item_end = add_ofst(ir_node, _abi_payload_size(ir_node)) # if the subtype is dynamic, the length check is performed in diff --git a/vyper/semantics/types/subscriptable.py b/vyper/semantics/types/subscriptable.py index c392ff48b1..4068d815d2 100644 --- a/vyper/semantics/types/subscriptable.py +++ b/vyper/semantics/types/subscriptable.py @@ -334,7 +334,10 @@ def __init__(self, member_types: Tuple[VyperType, ...]) -> None: self.key_type = UINT256_T # API Compatibility def __repr__(self): - return "(" + ", ".join(repr(t) for t in self.member_types) + ")" + if len(self.member_types) == 1: + (t,) = self.member_types + return f"({t},)" + return "(" + ", ".join(f"{t}" for t in self.member_types) + ")" @property def length(self): From 2d82a74937edeed5e9d4c0c8cecd78a0d70530fa Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 04:10:01 -0700 Subject: [PATCH 02/12] feat[test]: add more coverage to `abi_decode` fuzzer tests (#4153) fuzz with `unwrap_tuple=False` add fuzzing for structs follow up to 69e5c0541a9b23 --- .../builtins/codegen/test_abi_decode_fuzz.py | 124 +++++++++++++++--- vyper/semantics/types/user.py | 11 +- 2 files changed, 115 insertions(+), 20 deletions(-) diff --git a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py index d12b2cde7e..e215002446 100644 --- a/tests/functional/builtins/codegen/test_abi_decode_fuzz.py +++ b/tests/functional/builtins/codegen/test_abi_decode_fuzz.py @@ -18,12 +18,12 @@ IntegerT, SArrayT, StringT, + StructT, TupleT, VyperType, _get_primitive_types, _get_sequence_types, ) -from vyper.semantics.types.shortcuts import UINT256_T from .abi_decode import DecodeError, spec_decode @@ -39,7 +39,7 @@ continue type_ctors.append(t) -complex_static_ctors = [SArrayT, TupleT] +complex_static_ctors = [SArrayT, TupleT, StructT] complex_dynamic_ctors = [DArrayT] leaf_ctors = [t for t in type_ctors if t not in _get_sequence_types().values()] static_leaf_ctors = [t for t in leaf_ctors if t._is_prim_word] @@ -50,10 +50,12 @@ @st.composite # max type nesting -def vyper_type(draw, nesting=3, skip=None): +def vyper_type(draw, nesting=3, skip=None, source_fragments=None): assert nesting >= 0 skip = skip or [] + if source_fragments is None: + source_fragments = [] st_leaves = st.one_of(st.sampled_from(dynamic_leaf_ctors), st.sampled_from(static_leaf_ctors)) st_complex = st.one_of( @@ -71,39 +73,52 @@ def vyper_type(draw, nesting=3, skip=None): # note: maybe st.deferred is good here, we could define it with # mutual recursion def _go(skip=skip): - return draw(vyper_type(nesting=nesting - 1, skip=skip)) + _, typ = draw(vyper_type(nesting=nesting - 1, skip=skip, source_fragments=source_fragments)) + return typ + + def finalize(typ): + return source_fragments, typ if t in (BytesT, StringT): # arbitrary max_value bound = draw(st.integers(min_value=1, max_value=1024)) - return t(bound) + return finalize(t(bound)) if t == SArrayT: subtype = _go(skip=[TupleT, BytesT, StringT]) bound = draw(st.integers(min_value=1, max_value=6)) - return t(subtype, bound) + return finalize(t(subtype, bound)) if t == DArrayT: subtype = _go(skip=[TupleT]) bound = draw(st.integers(min_value=1, max_value=16)) - return t(subtype, bound) + return finalize(t(subtype, bound)) if t == TupleT: # zero-length tuples are not allowed in vyper n = draw(st.integers(min_value=1, max_value=6)) subtypes = [_go() for _ in range(n)] - return TupleT(subtypes) + return finalize(TupleT(subtypes)) + + if t == StructT: + n = draw(st.integers(min_value=1, max_value=6)) + subtypes = {f"x{i}": _go() for i in range(n)} + _id = len(source_fragments) # poor man's unique id + name = f"MyStruct{_id}" + typ = StructT(name, subtypes) + source_fragments.append(typ.def_source_str()) + return finalize(StructT(name, subtypes)) if t in (BoolT, AddressT): - return t() + return finalize(t()) if t == IntegerT: signed = draw(st.booleans()) bits = 8 * draw(st.integers(min_value=1, max_value=32)) - return t(signed, bits) + return finalize(t(signed, bits)) if t == BytesM_T: m = draw(st.integers(min_value=1, max_value=32)) - return t(m) + return finalize(t(m)) raise RuntimeError("unreachable") @@ -116,6 +131,9 @@ def _go(t): if isinstance(typ, TupleT): return tuple(_go(item_t) for item_t in typ.member_types) + if isinstance(typ, StructT): + return tuple(_go(item_t) for item_t in typ.tuple_members()) + if isinstance(typ, SArrayT): return [_go(typ.value_type) for _ in range(typ.length)] @@ -294,6 +312,13 @@ def _finalize(): # little trick to save re-typing the arguments num_dynamic_types = sum(s.num_dynamic_types for s in substats) return _finalize() + if isinstance(typ, StructT): + substats = [_type_stats(t) for t in typ.tuple_members()] + nesting = 1 + max(s.nesting for s in substats) + breadth = max(len(typ.member_types), *[s.breadth for s in substats]) + num_dynamic_types = sum(s.num_dynamic_types for s in substats) + return _finalize() + if isinstance(typ, DArrayT): substat = _type_stats(typ.value_type) nesting = 1 + substat.nesting @@ -332,8 +357,8 @@ def payload_copier(get_contract_from_ir): @pytest.mark.parametrize("_n", list(range(PARALLELISM))) @hp.given(typ=vyper_type()) @hp.settings(max_examples=100, **_settings) -@hp.example(typ=DArrayT(DArrayT(UINT256_T, 2), 2)) -def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): +def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier, env): + source_fragments, typ = typ # import time # t0 = time.time() # print("ENTER", typ) @@ -350,12 +375,13 @@ def test_abi_decode_fuzz(_n, typ, get_contract, tx_failed, payload_copier): # by bytes length check at function entry type_bound = wrapped_type.abi_type.size_bound() buffer_bound = type_bound + MAX_MUTATIONS - type_str = repr(typ) # annotation in vyper code - # TODO: intrinsic decode from staticcall/extcall - # TODO: _abi_decode from other sources (staticcall/extcall?) - # TODO: dirty the buffer - # TODO: check unwrap_tuple=False + + preamble = "\n\n".join(source_fragments) + type_str = str(typ) # annotation in vyper code + code = f""" +{preamble} + @external def run(xs: Bytes[{buffer_bound}]) -> {type_str}: ret: {type_str} = abi_decode(xs, {type_str}) @@ -375,6 +401,13 @@ def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: assert len(xs) <= {type_bound} return (extcall copier.bar(xs)) """ + try: + c = get_contract(code) + except EvmError as e: + if env.contract_size_limit_error in str(e): + hp.assume(False) + # print(code) + hp.note(code) c = get_contract(code) @hp.given(data=payload_from(wrapped_type)) @@ -382,7 +415,6 @@ def run3(xs: Bytes[{buffer_bound}], copier: Foo) -> {type_str}: def _fuzz(data): hp.note(f"type: {typ}") hp.note(f"abi_t: {wrapped_type.abi_type.selector_name()}") - hp.note(code) hp.note(data.hex()) try: @@ -414,3 +446,57 @@ def _fuzz(data): # t1 = time.time() # print(f"elapsed {t1 - t0}s") + + +@pytest.mark.parametrize("_n", list(range(PARALLELISM))) +@hp.given(typ=vyper_type()) +@hp.settings(max_examples=100, **_settings) +def test_abi_decode_no_wrap_fuzz(_n, typ, get_contract, tx_failed, env): + source_fragments, typ = typ + # import time + # t0 = time.time() + # print("ENTER", typ) + + stats = _type_stats(typ) + hp.target(stats.num_dynamic_types) + + # add max_mutations bytes worth of padding so we don't just get caught + # by bytes length check at function entry + type_bound = typ.abi_type.size_bound() + buffer_bound = type_bound + MAX_MUTATIONS + + type_str = str(typ) # annotation in vyper code + preamble = "\n\n".join(source_fragments) + + code = f""" +{preamble} + +@external +def run(xs: Bytes[{buffer_bound}]) -> {type_str}: + ret: {type_str} = abi_decode(xs, {type_str}, unwrap_tuple=False) + return ret + """ + try: + c = get_contract(code) + except EvmError as e: + if env.contract_size_limit_error in str(e): + hp.assume(False) + + @hp.given(data=payload_from(typ)) + @hp.settings(max_examples=100, **_settings) + def _fuzz(data): + hp.note(code) + hp.note(data.hex()) + try: + expected = spec_decode(typ, data) + hp.note(f"expected {expected}") + assert expected == c.run(data) + except DecodeError: + hp.note("expect failure") + with tx_failed(EvmError): + c.run(data) + + _fuzz() + + # t1 = time.time() + # print(f"elapsed {t1 - t0}s") diff --git a/vyper/semantics/types/user.py b/vyper/semantics/types/user.py index a6ee646e62..ca8e99bc92 100644 --- a/vyper/semantics/types/user.py +++ b/vyper/semantics/types/user.py @@ -371,8 +371,11 @@ def from_StructDef(cls, base_node: vy_ast.StructDef) -> "StructT": return cls(struct_name, members, ast_def=base_node) + def __str__(self): + return f"{self._id}" + def __repr__(self): - return f"{self._id} declaration object" + return f"{self._id} {self.members}" def _try_fold(self, node): if len(node.args) != 1: @@ -384,6 +387,12 @@ def _try_fold(self, node): # it can't be reduced, but this lets upstream code know it's constant return node + def def_source_str(self): + ret = f"struct {self._id}:\n" + for k, v in self.member_types.items(): + ret += f" {k}: {v}\n" + return ret + @property def size_in_bytes(self): return sum(i.size_in_bytes for i in self.member_types.values()) From c79c0b658ba34d7b161048d0d80ebd207ff5247b Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 08:22:50 -0700 Subject: [PATCH 03/12] fix[venom]: alloca for default arguments (#4155) this commit fixes an `ir_node_to_venom` translation bug. when there is a default argument to an external function, it can generate multiple allocas, because the entry points allocate separate symbol tables, but actually they should all correspond to the same alloca. for instance, `external 1 foo(uint256)12345` and `external 1 foo()67890` both feed into the same `external 1 foo()__common`, but the current translator mistakenly creates different symbol tables for the two "feeder" entry points, resulting in separate allocas for the same logical variable. this commit fixes the bug by fusing the symbol tables for multiple entry points to the same external function. --- vyper/venom/ir_node_to_venom.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 2c99cf5668..4fca95be90 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -107,14 +107,16 @@ NOOP_INSTRUCTIONS = frozenset(["pass", "cleanup_repeat", "var_list", "unique_symbol"]) SymbolTable = dict[str, Optional[IROperand]] -_global_symbols: SymbolTable = {} +_global_symbols: SymbolTable = None # type: ignore MAIN_ENTRY_LABEL_NAME = "__main_entry" +_external_functions: dict[int, SymbolTable] = None # type: ignore # convert IRnode directly to venom def ir_node_to_venom(ir: IRnode) -> IRContext: - global _global_symbols + global _global_symbols, _external_functions _global_symbols = {} + _external_functions = {} ctx = IRContext() fn = ctx.create_function(MAIN_ENTRY_LABEL_NAME) @@ -214,10 +216,6 @@ def _convert_ir_bb_list(fn, ir, symbols): return ret -current_func = None -var_list: list[str] = [] - - def pop_source_on_return(func): @functools.wraps(func) def pop_source(*args, **kwargs): @@ -232,7 +230,10 @@ def pop_source(*args, **kwargs): @pop_source_on_return def _convert_ir_bb(fn, ir, symbols): assert isinstance(ir, IRnode), ir - global _break_target, _continue_target, current_func, var_list, _global_symbols + # TODO: refactor these to not be globals + global _break_target, _continue_target, _global_symbols, _external_functions + + # keep a map from external functions to all possible entry points ctx = fn.ctx fn.push_source(ir) @@ -274,7 +275,6 @@ def _convert_ir_bb(fn, ir, symbols): return ret elif is_external: - _global_symbols = {} ret = _convert_ir_bb(fn, ir.args[0], symbols) _append_return_args(fn) else: @@ -382,6 +382,13 @@ def _convert_ir_bb(fn, ir, symbols): data = _convert_ir_bb(fn, c, symbols) ctx.append_data("db", [data]) # type: ignore elif ir.value == "label": + function_id_pattern = r"external (\d+)" + function_name = ir.args[0].value + m = re.match(function_id_pattern, function_name) + if m is not None: + function_id = m.group(1) + _global_symbols = _external_functions.setdefault(function_id, {}) + label = IRLabel(ir.args[0].value, True) bb = fn.get_basic_block() if not bb.is_terminated: From a72488ce68125a65813199f9b1188ce60a987feb Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Mon, 17 Jun 2024 14:23:30 -0700 Subject: [PATCH 04/12] fix[venom]: add `unique_symbols` check to venom pipeline (#4149) when `-Onone` is specified along with `--experimental-codegen`, the unique symbols check does not get run. this calculates the `ir_node.unique_symbols` property, which implicitly runs the unique symbols check. also, change an assertion to a proper panic exception --- vyper/codegen/ir_node.py | 3 ++- vyper/venom/ir_node_to_venom.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 9d39ebd033..97d9c45fb6 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -405,7 +405,8 @@ def unique_symbols(self): for arg in children: s = arg.unique_symbols non_uniques = ret.intersection(s) - assert len(non_uniques) == 0, f"non-unique symbols {non_uniques}" + if len(non_uniques) != 0: # pragma: nocover + raise CompilerPanic(f"non-unique symbols {non_uniques}") ret |= s return ret diff --git a/vyper/venom/ir_node_to_venom.py b/vyper/venom/ir_node_to_venom.py index 4fca95be90..85172c70e1 100644 --- a/vyper/venom/ir_node_to_venom.py +++ b/vyper/venom/ir_node_to_venom.py @@ -114,6 +114,8 @@ # convert IRnode directly to venom def ir_node_to_venom(ir: IRnode) -> IRContext: + _ = ir.unique_symbols # run unique symbols check + global _global_symbols, _external_functions _global_symbols = {} _external_functions = {} From d92cd344add84aa17434baefed24a6c548471cc2 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 18 Jun 2024 10:23:37 -0700 Subject: [PATCH 05/12] chore[docs]: add evaluation order warning for builtins (#4158) some builtins have undefined order of evaluation of arguments; make a note in the docs --- docs/built-in-functions.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/built-in-functions.rst b/docs/built-in-functions.rst index 367a08d80d..a0e424adb4 100644 --- a/docs/built-in-functions.rst +++ b/docs/built-in-functions.rst @@ -1090,3 +1090,6 @@ Utilities .. note:: Issuing of the static call is *NOT* mode-dependent (that is, it is not removed from production code), although the compiler will issue a warning whenever ``print`` is used. + +.. warning:: + In Vyper, as of v0.4.0, the order of argument evaluation of builtins is not defined. That means that the compiler may choose to reorder evaluation of arguments. For example, ``extract32(x(), y())`` may yield unexpected results if ``x()`` and ``y()`` both touch the same data. For this reason, it is best to avoid calling functions with side-effects inside of builtins. For more information, see `GHSA-g2xh-c426-v8mf `_ and `issue #4019 `_. From 3d9c537142fb99b2672f21e2057f5f202cde194f Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Tue, 18 Jun 2024 12:49:36 -0700 Subject: [PATCH 06/12] fix[codegen]: panic on potential eval order issue for some builtins (#4157) `extract32()` and `slice()` have an evaluation order issue when the arguments touch the same data. specifically, the length and data evaluation are interleaved with the index/start/length evaluations. in unusual situations (such as those in the included test cases), this can result in "invalid" reads where the data and length reads appear out of order. this commit conservatively blocks compilation if the preconditions for the interleaved evaluation are detected. --------- Co-authored-by: trocher Co-authored-by: cyberthirst --- .../builtins/codegen/test_extract32.py | 48 +++++++++++++++++ .../functional/builtins/codegen/test_slice.py | 52 ++++++++++++++++++- vyper/builtins/functions.py | 7 +++ 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/tests/functional/builtins/codegen/test_extract32.py b/tests/functional/builtins/codegen/test_extract32.py index 8a92adbb07..f8db51ee36 100644 --- a/tests/functional/builtins/codegen/test_extract32.py +++ b/tests/functional/builtins/codegen/test_extract32.py @@ -1,6 +1,7 @@ import pytest from vyper.evm.opcodes import version_check +from vyper.exceptions import CompilerPanic @pytest.mark.parametrize("location", ["storage", "transient"]) @@ -98,3 +99,50 @@ def foq(inp: Bytes[32]) -> address: with tx_failed(): c.foq(b"crow" * 8) + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_extract32_order_of_eval(get_contract): + extract32_code = """ +var:DynArray[Bytes[96], 1] + +@internal +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> bytes32: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return extract32(self.var[0], self.bar(), output_type=bytes32) + """ + + c = get_contract(extract32_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_extract32_order_of_eval_extcall(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> bytes32: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return extract32(self.var[0], extcall Bar(self).bar(), output_type=bytes32) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" diff --git a/tests/functional/builtins/codegen/test_slice.py b/tests/functional/builtins/codegen/test_slice.py index 08800e7a8c..d5d1efca0f 100644 --- a/tests/functional/builtins/codegen/test_slice.py +++ b/tests/functional/builtins/codegen/test_slice.py @@ -5,7 +5,7 @@ from vyper.compiler import compile_code from vyper.compiler.settings import OptimizationLevel, Settings from vyper.evm.opcodes import version_check -from vyper.exceptions import ArgumentException, TypeMismatch +from vyper.exceptions import ArgumentException, CompilerPanic, TypeMismatch _fun_bytes32_bounds = [(0, 32), (3, 29), (27, 5), (0, 5), (5, 3), (30, 2)] @@ -562,3 +562,53 @@ def foo(cs: String[64]) -> uint256: c = get_contract(code) # ensure that counter was incremented only once assert c.foo(arg) == 1 + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_slice_order_of_eval(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 32 + +@external +def foo() -> Bytes[96]: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return slice(self.var[0], 3, extcall Bar(self).bar()) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_slice_order_of_eval2(get_contract): + slice_code = """ +var:DynArray[Bytes[96], 1] + +interface Bar: + def bar() -> uint256: payable + +@external +def bar() -> uint256: + self.var[0] = b'hellohellohellohellohellohellohello' + self.var.pop() + return 3 + +@external +def foo() -> Bytes[96]: + self.var = [b'abcdefghijklmnopqrstuvwxyz123456789'] + return slice(self.var[0], extcall Bar(self).bar(), 32) + """ + + c = get_contract(slice_code) + assert c.foo() == b"defghijklmnopqrstuvwxyz123456789" diff --git a/vyper/builtins/functions.py b/vyper/builtins/functions.py index 2564329b65..672d978455 100644 --- a/vyper/builtins/functions.py +++ b/vyper/builtins/functions.py @@ -29,6 +29,7 @@ get_type_for_exact_size, ir_tuple_from_args, make_setter, + potential_overlap, promote_signed_int, sar, shl, @@ -357,6 +358,9 @@ def build_IR(self, expr, args, kwargs, context): assert is_bytes32, src src = ensure_in_memory(src, context) + if potential_overlap(src, start) or potential_overlap(src, length): + raise CompilerPanic("risky overlap") + with src.cache_when_complex("src") as (b1, src), start.cache_when_complex("start") as ( b2, start, @@ -862,6 +866,9 @@ def build_IR(self, expr, args, kwargs, context): bytez, index = args ret_type = kwargs["output_type"] + if potential_overlap(bytez, index): + raise CompilerPanic("risky overlap") + def finalize(ret): annotation = "extract32" ret = IRnode.from_list(ret, typ=ret_type, annotation=annotation) From 4594f8badf13a583875f8891698cd3bbefb1c787 Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Wed, 19 Jun 2024 12:38:33 -0700 Subject: [PATCH 07/12] fix[codegen]: panic on potential subscript eval order issue (#4159) subscript expressions have an evaluation order issue when evaluation of the index (i.e. `node.index`) modifies the parent (i.e. `node.value`). because the evaluation of the parent is interleaved with evaluation of the index, it can result in "invalid" reads where the length check occurs before evaluation of the index, but the data read occurs afterwards. if evaluation of the index results in modification of the container size for instance, the data read from the container can happen on a dangling reference. another variant of this issue would be accessing `self.nested_array.pop().append(...)`; however, this currently happens to be blocked by a panic in the frontend. this commit conservatively blocks compilation if the preconditions for the interleaved evaluation are detected. POC tests that the appropriate panics are generated are included as well. --------- Co-authored-by: trocher Co-authored-by: Hubert Ritzdorf Co-authored-by: cyberthirst --- .../codegen/types/test_array_indexing.py | 77 +++++++++++++++++++ .../codegen/types/test_dynamic_array.py | 16 ++++ vyper/ast/nodes.pyi | 1 + vyper/codegen/core.py | 20 +++++ vyper/codegen/expr.py | 7 ++ vyper/codegen/ir_node.py | 12 +++ vyper/semantics/analysis/utils.py | 15 +++- 7 files changed, 147 insertions(+), 1 deletion(-) diff --git a/tests/functional/codegen/types/test_array_indexing.py b/tests/functional/codegen/types/test_array_indexing.py index 45e777d919..7f5c0d0e21 100644 --- a/tests/functional/codegen/types/test_array_indexing.py +++ b/tests/functional/codegen/types/test_array_indexing.py @@ -1,5 +1,9 @@ # TODO: rewrite the tests in type-centric way, parametrize array and indices types +import pytest + +from vyper.exceptions import CompilerPanic + def test_negative_ix_access(get_contract, tx_failed): # Arrays can't be accessed with negative indices @@ -130,3 +134,76 @@ def foo(): c.foo() for i in range(10): assert c.arr(i) == i + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap(get_contract): + code = """ +a: public(DynArray[DynArray[Bytes[96], 5], 5]) + +@external +def foo() -> Bytes[96]: + self.a.append([b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx']) + return self.a[0][self.bar()] + + +@internal +def bar() -> uint256: + self.a[0] = [b'yyy'] + self.a.pop() + return 0 + """ + c = get_contract(code) + # tricky to get this right, for now we just panic instead of generating code + assert c.foo() == b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap_extcall(get_contract): + code = """ + +interface Bar: + def bar() -> uint256: payable + +a: public(DynArray[DynArray[Bytes[96], 5], 5]) + +@external +def foo() -> Bytes[96]: + self.a.append([b'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx']) + return self.a[0][extcall Bar(self).bar()] + + +@external +def bar() -> uint256: + self.a[0] = [b'yyy'] + self.a.pop() + return 0 + """ + c = get_contract(code) + assert c.foo() == b"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + + +# to fix in future release +@pytest.mark.xfail(raises=CompilerPanic, reason="risky overlap") +def test_array_index_overlap_extcall2(get_contract): + code = """ +interface B: + def calculate_index() -> uint256: nonpayable + +a: HashMap[uint256, DynArray[uint256, 5]] + +@external +def bar() -> uint256: + self.a[0] = [2] + return self.a[0][extcall B(self).calculate_index()] + +@external +def calculate_index() -> uint256: + self.a[0] = [1] + return 0 + """ + c = get_contract(code) + + assert c.bar() == 1 diff --git a/tests/functional/codegen/types/test_dynamic_array.py b/tests/functional/codegen/types/test_dynamic_array.py index 5f26e05839..2a0f4e77e5 100644 --- a/tests/functional/codegen/types/test_dynamic_array.py +++ b/tests/functional/codegen/types/test_dynamic_array.py @@ -8,6 +8,7 @@ from vyper.exceptions import ( ArgumentException, ArrayIndexException, + CompilerPanic, ImmutableViolation, OverflowException, StackTooDeep, @@ -1887,3 +1888,18 @@ def boo() -> uint256: c = get_contract(code) assert c.foo() == [1, 2, 3, 4] + + +@pytest.mark.xfail(raises=CompilerPanic) +def test_dangling_reference(get_contract, tx_failed): + code = """ +a: DynArray[DynArray[uint256, 5], 5] + +@external +def foo(): + self.a = [[1]] + self.a.pop().append(2) + """ + c = get_contract(code) + with tx_failed(): + c.foo() diff --git a/vyper/ast/nodes.pyi b/vyper/ast/nodes.pyi index 1c7aaf55ee..58c7d0b2e4 100644 --- a/vyper/ast/nodes.pyi +++ b/vyper/ast/nodes.pyi @@ -23,6 +23,7 @@ class VyperNode: end_col_offset: int = ... _metadata: dict = ... _original_node: Optional[VyperNode] = ... + _children: list[VyperNode] = ... def __init__(self, parent: Optional[VyperNode] = ..., **kwargs: Any) -> None: ... def __hash__(self) -> Any: ... def __eq__(self, other: Any) -> Any: ... diff --git a/vyper/codegen/core.py b/vyper/codegen/core.py index 9a0a08097c..25a6d06fbf 100644 --- a/vyper/codegen/core.py +++ b/vyper/codegen/core.py @@ -924,6 +924,26 @@ def potential_overlap(left, right): return False +# similar to `potential_overlap()`, but compares left's _reads_ vs +# right's _writes_. +# TODO: `potential_overlap()` can probably be replaced by this function, +# but all the cases need to be checked. +def read_write_overlap(left, right): + if not isinstance(left, IRnode) or not isinstance(right, IRnode): + return False + + if left.typ._is_prim_word and right.typ._is_prim_word: + return False + + if len(left.referenced_variables & right.variable_writes) > 0: + return True + + if len(left.referenced_variables) > 0 and right.contains_risky_call: + return True + + return False + + # Create an x=y statement, where the types may be compound def make_setter(left, right, hi=None): check_assign(left, right) diff --git a/vyper/codegen/expr.py b/vyper/codegen/expr.py index 65df5a0930..f28a068be6 100644 --- a/vyper/codegen/expr.py +++ b/vyper/codegen/expr.py @@ -21,6 +21,7 @@ make_setter, pop_dyn_array, potential_overlap, + read_write_overlap, sar, shl, shr, @@ -40,6 +41,7 @@ UnimplementedException, tag_exceptions, ) +from vyper.semantics.analysis.utils import get_expr_writes from vyper.semantics.types import ( AddressT, BoolT, @@ -86,6 +88,9 @@ def __init__(self, node, context, is_stmt=False): self.ir_node = fn() assert isinstance(self.ir_node, IRnode), self.ir_node + writes = set(access.variable for access in get_expr_writes(self.expr)) + self.ir_node._writes = writes + self.ir_node.annotation = self.expr.get("node_source_code") self.ir_node.ast_source = self.expr @@ -352,6 +357,8 @@ def parse_Subscript(self): elif is_array_like(sub.typ): index = Expr.parse_value_expr(self.expr.slice, self.context) + if read_write_overlap(sub, index): + raise CompilerPanic("risky overlap") elif is_tuple_like(sub.typ): # should we annotate expr.slice in the frontend with the diff --git a/vyper/codegen/ir_node.py b/vyper/codegen/ir_node.py index 97d9c45fb6..6f9eb0359b 100644 --- a/vyper/codegen/ir_node.py +++ b/vyper/codegen/ir_node.py @@ -467,6 +467,18 @@ def referenced_variables(self): return ret + @cached_property + def variable_writes(self): + ret = getattr(self, "_writes", set()) + + for arg in self.args: + ret |= arg.variable_writes + + if getattr(self, "is_self_call", False): + ret |= self.invoked_function_ir.func_ir.variable_writes + + return ret + @cached_property def contains_risky_call(self): ret = self.value in ("call", "delegatecall", "staticcall", "create", "create2") diff --git a/vyper/semantics/analysis/utils.py b/vyper/semantics/analysis/utils.py index be323b1d13..d30eee79e0 100644 --- a/vyper/semantics/analysis/utils.py +++ b/vyper/semantics/analysis/utils.py @@ -24,7 +24,7 @@ from vyper.semantics.types.bytestrings import BytesT, StringT from vyper.semantics.types.primitives import AddressT, BoolT, BytesM_T, IntegerT from vyper.semantics.types.subscriptable import DArrayT, SArrayT, TupleT -from vyper.utils import checksum_encode, int_to_fourbytes +from vyper.utils import OrderedSet, checksum_encode, int_to_fourbytes def _validate_op(node, types_list, validation_fn_name): @@ -681,3 +681,16 @@ def check_modifiability(node: vy_ast.ExprNode, modifiability: Modifiability) -> info = get_expr_info(node) return info.modifiability <= modifiability + + +# TODO: move this into part of regular analysis in `local.py` +def get_expr_writes(node: vy_ast.VyperNode) -> OrderedSet[VarAccess]: + if "writes_r" in node._metadata: + return node._metadata["writes_r"] + ret: OrderedSet = OrderedSet() + if isinstance(node, vy_ast.ExprNode) and node._expr_info is not None: + ret = node._expr_info._writes + for c in node._children: + ret |= get_expr_writes(c) + node._metadata["writes_r"] = ret + return ret From e9db8d9f7486eae38f5b86531629019ad28f514e Mon Sep 17 00:00:00 2001 From: Charles Cooper Date: Thu, 20 Jun 2024 09:27:25 -0700 Subject: [PATCH 08/12] feat[docs]: v0.4.0 release (#4152) add release notes for v0.4.0 release slight update to wording of front matter --- docs/index.rst | 7 +- docs/release-notes.rst | 310 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 311 insertions(+), 6 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 5baaebb339..6c36b5fd7c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,14 +6,17 @@ Vyper ##### -Vyper is a contract-oriented, pythonic programming language that targets the `Ethereum Virtual Machine (EVM) `_. +Vyper is a contract-oriented, Pythonic programming language that targets the `Ethereum Virtual Machine (EVM) `_. +It prioritizes user safety, encourages clear coding practices via language design and efficient execution. In other words, Vyper code is safe, clear and efficient! Principles and Goals ==================== * **Security**: It should be possible and natural to build secure smart-contracts in Vyper. * **Language and compiler simplicity**: The language and the compiler implementation should strive to be simple. -* **Auditability**: Vyper code should be maximally human-readable. Furthermore, it should be maximally difficult to write misleading code. Simplicity for the reader is more important than simplicity for the writer, and simplicity for readers with low prior experience with Vyper (and low prior experience with programming in general) is particularly important. +* **Auditability**: Vyper code should be maximally human-readable. + Furthermore, it should be maximally difficult to write misleading code. + Simplicity for the reader is more important than simplicity for the writer, and simplicity for readers with low prior experience with Vyper (and low prior experience with programming in general) is particularly important. Because of this Vyper provides the following features: diff --git a/docs/release-notes.rst b/docs/release-notes.rst index c107ee5554..fa17ef4f7b 100644 --- a/docs/release-notes.rst +++ b/docs/release-notes.rst @@ -11,17 +11,319 @@ Release Notes :'<,'>s/\v(https:\/\/github.com\/vyperlang\/vyper\/pull\/)(\d+)/(`#\2 <\1\2>`_)/g ex. in: https://github.com/vyperlang/vyper/pull/3373 ex. out: (`#3373 `_) + remove authorship slugs (leave them on github release page; they have no meaning outside of github though) + :'<,'>s/by @\S\+ //c for advisory links: :'<,'>s/\v(https:\/\/github.com\/vyperlang\/vyper\/security\/advisories\/)([-A-Za-z0-9]+)/(`\2 <\1\2>`_)/g -v0.4.0b1 ("Nagini") -******************* +v0.4.0 ("Nagini") +***************** -Date released: TBD -================== +Date released: 2024-06-20 +========================= v0.4.0 represents a major overhaul to the Vyper language. Notably, it overhauls the import system and adds support for code reuse. It also adds a new, experimental backend to Vyper which lays the foundation for improved analysis, optimization and integration with third party tools. +Breaking Changes +---------------- +* feat[tool]!: make cancun the default evm version (`#4029 `_) +* feat[lang]: remove named reentrancy locks (`#3769 `_) +* feat[lang]!: change the signature of ``block.prevrandao`` (`#3879 `_) +* feat[lang]!: change ABI type of ``decimal`` to ``int168`` (`#3696 `_) +* feat[lang]: rename ``_abi_encode`` and ``_abi_decode`` (`#4097 `_) +* feat[lang]!: add feature flag for decimals (`#3930 `_) +* feat[lang]!: make internal decorator optional (`#4040 `_) +* feat[lang]: protect external calls with keyword (`#2938 `_) +* introduce floordiv, ban regular div for integers (`#2937 `_) +* feat[lang]: use keyword arguments for struct instantiation (`#3777 `_) +* feat: require type annotations for loop variables (`#3596 `_) +* feat: replace ``enum`` with ``flag`` keyword (`#3697 `_) +* feat: remove builtin constants (`#3350 `_) +* feat: drop istanbul and berlin support (`#3843 `_) +* feat: allow range with two arguments and bound (`#3679 `_) +* fix[codegen]: range bound check for signed integers (`#3814 `_) +* feat: default code offset = 3 (`#3454 `_) +* feat: rename ``vyper.interfaces`` to ``ethereum.ercs`` (`#3741 `_) +* chore: add prefix to ERC interfaces (`#3804 `_) +* chore[ux]: compute natspec as part of standard pipeline (`#3946 `_) +* feat: deprecate ``vyper-serve`` (`#3666 `_) + +Module system +------------- +* refactor: internal handling of imports (`#3655 `_) +* feat: implement "stateless" modules (`#3663 `_) +* feat[lang]: export interfaces (`#3919 `_) +* feat[lang]: singleton modules with ownership hierarchy (`#3729 `_) +* feat[lang]: implement function exports (`#3786 `_) +* feat[lang]: auto-export events in ABI (`#3808 `_) +* fix: allow using interface defs from imported modules (`#3725 `_) +* feat: add support for constants in imported modules (`#3726 `_) +* fix[lang]: prevent modules as storage variables (`#4088 `_) +* fix[ux]: improve initializer hint for unimported modules (`#4145 `_) +* feat: add python ``sys.path`` to vyper path (`#3763 `_) +* feat[ux]: improve error message for importing ERC20 (`#3816 `_) +* fix[lang]: fix importing of flag types (`#3871 `_) +* feat: search path resolution for cli (`#3694 `_) +* fix[lang]: transitive exports (`#3888 `_) +* fix[ux]: error messages relating to initializer issues (`#3831 `_) +* fix[lang]: recursion in ``uses`` analysis for nonreentrant functions (`#3971 `_) +* fix[ux]: fix ``uses`` error message (`#3926 `_) +* fix[lang]: fix ``uses`` analysis for nonreentrant functions (`#3927 `_) +* fix[lang]: fix a hint in global initializer check (`#4089 `_) +* fix[lang]: builtin type comparisons (`#3956 `_) +* fix[tool]: fix ``combined_json`` output for CLI (`#3901 `_) +* fix[tool]: compile multiple files (`#4053 `_) +* refactor: reimplement AST folding (`#3669 `_) +* refactor: constant folding (`#3719 `_) +* fix[lang]: typecheck hashmap indexes with folding (`#4007 `_) +* fix[lang]: fix array index checks when the subscript is folded (`#3924 `_) +* fix[lang]: pure access analysis (`#3895 `_) + +Venom +----- +* feat: implement new IR for vyper (venom IR) (`#3659 `_) +* feat[ir]: add ``make_ssa`` pass to venom pipeline (`#3825 `_) +* feat[venom]: implement ``mem2var`` and ``sccp`` passes (`#3941 `_) +* feat[venom]: add store elimination pass (`#4021 `_) +* feat[venom]: add ``extract_literals`` pass (`#4067 `_) +* feat[venom]: optimize branching (`#4049 `_) +* feat[venom]: avoid last ``swap`` for commutative ops (`#4048 `_) +* feat[venom]: "pickaxe" stack scheduler optimization (`#3951 `_) +* feat[venom]: add algebraic optimization pass (`#4054 `_) +* feat: Implement target constrained venom jump instruction (`#3687 `_) +* feat: remove ``deploy`` instruction from venom (`#3703 `_) +* fix[venom]: liveness analysis in some loops (`#3732 `_) +* feat: add more venom instructions (`#3733 `_) +* refactor[venom]: use venom pass instances (`#3908 `_) +* refactor[venom]: refactor venom operand classes (`#3915 `_) +* refactor[venom]: introduce ``IRContext`` and ``IRAnalysisCache`` (`#3983 `_) +* feat: add utility functions to ``OrderedSet`` (`#3833 `_) +* feat[venom]: optimize ``get_basic_block()`` (`#4002 `_) +* fix[venom]: fix branch eliminator cases in sccp (`#4003 `_) +* fix[codegen]: same symbol jumpdest merge (`#3982 `_) +* fix[venom]: fix eval of ``exp`` in sccp (`#4009 `_) +* refactor[venom]: remove unused method in ``make_ssa.py`` (`#4012 `_) +* fix[venom]: fix return opcode handling in mem2var (`#4011 `_) +* fix[venom]: fix ``cfg`` output format (`#4010 `_) +* chore[venom]: fix output formatting of data segment in ``IRContext`` (`#4016 `_) +* feat[venom]: optimize mem2var and store/variable elimination pass sequences (`#4032 `_) +* fix[venom]: fix some sccp evaluations (`#4028 `_) +* fix[venom]: add ``unique_symbols`` check to venom pipeline (`#4149 `_) +* feat[venom]: remove redundant store elimination pass (`#4036 `_) +* fix[venom]: remove some dead code in ``venom_to_assembly`` (`#4042 `_) +* feat[venom]: improve unused variable removal pass (`#4055 `_) +* fix[venom]: remove liveness requests (`#4058 `_) +* fix[venom]: fix list of volatile instructions (`#4065 `_) +* fix[venom]: remove dominator tree invalidation for store elimination pass (`#4069 `_) +* fix[venom]: move loop invariant assertion to entry block (`#4098 `_) +* fix[venom]: clear ``out_vars`` during calculation (`#4129 `_) +* fix[venom]: alloca for default arguments (`#4155 `_) +* Refactor ctx.add_instruction() and friends (`#3685 `_) +* fix: type annotation of helper function (`#3702 `_) +* feat[ir]: emit ``djump`` in dense selector table (`#3849 `_) +* chore: move venom tests to ``tests/unit/compiler`` (`#3684 `_) + +Other new features +------------------ +* feat[lang]: add ``blobhash()`` builtin (`#3962 `_) +* feat[lang]: support ``block.blobbasefee`` (`#3945 `_) +* feat[lang]: add ``revert_on_failure`` kwarg for create builtins (`#3844 `_) +* feat[lang]: allow downcasting of bytestrings (`#3832 `_) + +Docs +---- +* chore[docs]: add docs for v0.4.0 features (`#3947 `_) +* chore[docs]: ``implements`` does not check event declarations (`#4052 `_) +* docs: adopt a new theme: ``shibuya`` (`#3754 `_) +* chore[docs]: add evaluation order warning for builtins (`#4158 `_) +* Update ``FUNDING.yml`` (`#3636 `_) +* docs: fix nit in v0.3.10 release notes (`#3638 `_) +* docs: add note on ``pragma`` parsing (`#3640 `_) +* docs: retire security@vyperlang.org (`#3660 `_) +* feat[docs]: add more detail to modules docs (`#4087 `_) +* docs: update resources section (`#3656 `_) +* docs: add script to help working on the compiler (`#3674 `_) +* docs: add warnings at the top of all example token contracts (`#3676 `_) +* docs: typo in ``on_chain_market_maker.vy`` (`#3677 `_) +* docs: clarify ``address.codehash`` for empty account (`#3711 `_) +* docs: indexed arguments for events are limited (`#3715 `_) +* docs: Fix typos (`#3747 `_) +* docs: Upgrade dependencies and fixes (`#3745 `_) +* docs: add missing cli flags (`#3736 `_) +* chore: fix formatting and docs for new struct instantiation syntax (`#3792 `_) +* docs: floordiv (`#3797 `_) +* docs: add missing ``annotated_ast`` flag (`#3813 `_) +* docs: update logo in readme, remove competition reference (`#3837 `_) +* docs: add rationale for floordiv rounding behavior (`#3845 `_) +* chore[docs]: amend ``revert_on_failure`` kwarg docs for create builtins (`#3921 `_) +* fix[docs]: fix clipped ``endAuction`` method in example section (`#3969 `_) +* refactor[docs]: refactor security policy (`#3981 `_) +* fix: edit link to style guide (`#3658 `_) +* Add Vyper online compiler tooling (`#3680 `_) +* chore: fix typos (`#3749 `_) + +Bugfixes +-------- +* fix[codegen]: fix ``raw_log()`` when topics are non-literals (`#3977 `_) +* fix[codegen]: fix transient codegen for ``slice`` and ``extract32`` (`#3874 `_) +* fix[codegen]: bounds check for signed index accesses (`#3817 `_) +* fix: disallow ``value=`` passing for delegate and static raw_calls (`#3755 `_) +* fix[codegen]: fix double evals in sqrt, slice, blueprint (`#3976 `_) +* fix[codegen]: fix double eval in dynarray append/pop (`#4030 `_) +* fix[codegen]: fix double eval of start in range expr (`#4033 `_) +* fix[codegen]: overflow check in ``slice()`` (`#3818 `_) +* fix: concat buffer bug (`#3738 `_) +* fix[codegen]: fix ``make_setter`` overlap with internal calls (`#4037 `_) +* fix[codegen]: fix ``make_setter`` overlap in ``dynarray_append`` (`#4059 `_) +* fix[codegen]: ``make_setter`` overlap in the presence of ``staticcall`` (`#4128 `_) +* fix[codegen]: fix ``_abi_decode`` buffer overflow (`#3925 `_) +* fix[codegen]: zero-length dynarray ``abi_decode`` validation (`#4060 `_) +* fix[codegen]: recursive dynarray oob check (`#4091 `_) +* fix[codegen]: add back in ``returndatasize`` check (`#4144 `_) +* fix: block memory allocation overflow (`#3639 `_) +* fix[codegen]: panic on potential eval order issue for some builtins (`#4157 `_) +* fix[codegen]: panic on potential subscript eval order issue (`#4159 `_) +* add comptime check for uint2str input (`#3671 `_) +* fix: dead code analysis inside for loops (`#3731 `_) +* fix[ir]: fix a latent bug in ``sha3_64`` codegen (`#4063 `_) +* fix: ``opcodes`` and ``opcodes_runtime`` outputs (`#3735 `_) +* fix: bad assertion in expr.py (`#3758 `_) +* fix: iterator modification analysis (`#3764 `_) +* feat: allow constant interfaces (`#3718 `_) +* fix: assembly dead code eliminator (`#3791 `_) +* fix: prevent range over decimal (`#3798 `_) +* fix: mutability check for interface implements (`#3805 `_) +* fix[codegen]: fix non-memory reason strings (`#3877 `_) +* fix[ux]: fix compiler hang for large exponentiations (`#3893 `_) +* fix[lang]: allow type expressions inside pure functions (`#3906 `_) +* fix[ux]: raise ``VersionException`` with source info (`#3920 `_) +* fix[lang]: fix ``pow`` folding when args are not literals (`#3949 `_) +* fix[codegen]: fix some hardcoded references to ``STORAGE`` location (`#4015 `_) + +Patched security advisories (GHSAs) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Bounds check on built-in ``slice()`` function can be overflowed (`GHSA-9x7f-gwxq-6f2c `_) +* ``concat`` built-in can corrupt memory (`GHSA-2q8v-3gqq-4f8p `_) +* ``raw_call`` ``value=`` kwargs not disabled for static and delegate calls (`GHSA-x2c2-q32w-4w6m `_) +* negative array index bounds checks (`GHSA-52xq-j7v9-v4v2 `_) +* ``range(start, start + N)`` reverts for negative numbers (`GHSA-ppx5-q359-pvwj `_) +* incorrect topic logging in ``raw_log`` (`GHSA-xchq-w5r3-4wg3 `_) +* double eval of the ``slice`` start/length args in certain cases (`GHSA-r56x-j438-vw5m `_) +* multiple eval of ``sqrt()`` built in argument (`GHSA-5jrj-52x8-m64h `_) +* double eval of raw_args in ``create_from_blueprint`` (`GHSA-3whq-64q2-qfj6 `_) +* ``sha3`` codegen bug (`GHSA-6845-xw22-ffxv `_) +* ``extract32`` can read dirty memory (`GHSA-4hwq-4cpm-8vmx `_) +* ``_abi_decode`` Memory Overflow (`GHSA-9p8r-4xp4-gw5w `_) +* External calls can overflow return data to return input buffer (`GHSA-gp3w-2v2m-p686 `_) + +Tooling +------- +* feat[tool]: archive format (`#3891 `_) +* feat[tool]: add source map for constructors (`#4008 `_) +* feat: add short options ``-v`` and ``-O`` to the CLI (`#3695 `_) +* feat: Add ``bb`` and ``bb_runtime`` output options (`#3700 `_) +* fix: remove hex-ir from format cli options list (`#3657 `_) +* fix: pickleability of ``CompilerData`` (`#3803 `_) +* feat[tool]: validate AST nodes early in the pipeline (`#3809 `_) +* feat[tool]: delay global constraint check (`#3810 `_) +* feat[tool]: export variable read/write access (`#3790 `_) +* feat[tool]: improvements to AST annotation (`#3829 `_) +* feat[tool]: add ``node_id`` map to source map (`#3811 `_) +* chore[tool]: add help text for ``hex-ir`` CLI flag (`#3942 `_) +* refactor[tool]: refactor storage layout export (`#3789 `_) +* fix[tool]: fix cross-compilation issues, add windows CI (`#4014 `_) +* fix[tool]: star option in ``outputSelection`` (`#4094 `_) + +Performance +----------- +* perf: lazy eval of f-strings in IRnode ctor (`#3602 `_) +* perf: levenshtein optimization (`#3780 `_) +* feat: frontend optimizations (`#3781 `_) +* feat: optimize ``VyperNode.deepcopy`` (`#3784 `_) +* feat: more frontend optimizations (`#3785 `_) +* perf: reimplement ``IRnode.__deepcopy__`` (`#3761 `_) + +Testing suite improvements +-------------------------- +* refactor[test]: bypass ``eth-tester`` and interface with evm backend directly (`#3846 `_) +* feat: Refactor assert_tx_failed into a context (`#3706 `_) +* feat[test]: implement ``abi_decode`` spec test (`#4095 `_) +* feat[test]: add more coverage to ``abi_decode`` fuzzer tests (`#4153 `_) +* feat[ci]: enable cancun testing (`#3861 `_) +* fix: add missing test for memory allocation overflow (`#3650 `_) +* chore: fix test for ``slice`` (`#3633 `_) +* add abi_types unit tests (`#3662 `_) +* refactor: test directory structure (`#3664 `_) +* chore: test all output formats (`#3683 `_) +* chore: deduplicate test files (`#3773 `_) +* feat[test]: add more transient storage tests (`#3883 `_) +* chore[ci]: fix apt-get failure in era pipeline (`#3821 `_) +* chore[ci]: enable python3.12 tests (`#3860 `_) +* chore[ci]: refactor jobs to use gh actions (`#3863 `_) +* chore[ci]: use ``--dist worksteal`` from latest ``xdist`` (`#3869 `_) +* chore: run mypy as part of lint rule in Makefile (`#3771 `_) +* chore[test]: always specify the evm backend (`#4006 `_) +* chore: update lint dependencies (`#3704 `_) +* chore: add color to mypy output (`#3793 `_) +* chore: remove tox rules for lint commands (`#3826 `_) +* chore[ci]: roll back GH actions/artifacts version (`#3838 `_) +* chore: Upgrade GitHub action dependencies (`#3807 `_) +* chore[ci]: pin eth-abi for decode regression (`#3834 `_) +* fix[ci]: release artifacts (`#3839 `_) +* chore[ci]: merge mypy job into lint (`#3840 `_) +* test: parametrize CI over EVM versions (`#3842 `_) +* feat[ci]: add PR title validation (`#3887 `_) +* fix[test]: fix failure in grammar fuzzing (`#3892 `_) +* feat[test]: add ``xfail_strict``, clean up ``setup.cfg`` (`#3889 `_) +* fix[ci]: pin hexbytes to pre-1.0.0 (`#3903 `_) +* chore[test]: update hexbytes version and tests (`#3904 `_) +* fix[test]: fix a bad bound in decimal fuzzing (`#3909 `_) +* fix[test]: fix a boundary case in decimal fuzzing (`#3918 `_) +* feat[ci]: update pypi release pipeline to use OIDC (`#3912 `_) +* chore[ci]: reconfigure single commit validation (`#3937 `_) +* chore[ci]: downgrade codecov action to v3 (`#3940 `_) +* feat[ci]: add codecov configuration (`#4057 `_) +* feat[test]: remove memory mocker (`#4005 `_) +* refactor[test]: change fixture scope in examples (`#3995 `_) +* fix[test]: fix call graph stability fuzzer (`#4064 `_) +* chore[test]: add macos to test matrix (`#4025 `_) +* refactor[test]: change default expected exception type (`#4004 `_) + +Misc / refactor +--------------- +* feat[ir]: add ``eval_once`` sanity fences to more builtins (`#3835 `_) +* fix: reorder compilation of branches in stmt.py (`#3603 `_) +* refactor[codegen]: make settings into a global object (`#3929 `_) +* chore: improve exception handling in IR generation (`#3705 `_) +* refactor: merge ``annotation.py`` and ``local.py`` (`#3456 `_) +* chore[ux]: remove deprecated python AST classes (`#3998 `_) +* refactor[ux]: remove deprecated ``VyperNode`` properties (`#3999 `_) +* feat: remove Index AST node (`#3757 `_) +* refactor: for loop target parsing (`#3724 `_) +* chore: improve diagnostics for invalid for loop annotation (`#3721 `_) +* refactor: builtin functions inherit from ``VyperType`` (`#3559 `_) +* fix: remove .keyword from Call AST node (`#3689 `_) +* improvement: assert descriptions in Crowdfund finalize() and participate() (`#3064 `_) +* feat: improve panics in IR generation (`#3708 `_) +* feat: improve warnings, refactor ``vyper_warn()`` (`#3800 `_) +* fix[ir]: unique symbol name (`#3848 `_) +* refactor: remove duplicate terminus checking code (`#3541 `_) +* refactor: ``ExprVisitor`` type validation (`#3739 `_) +* chore: improve exception for type validation (`#3759 `_) +* fix: fuzz test not updated to use TypeMismatch (`#3768 `_) +* chore: fix StringEnum._generate_next_value_ signature (`#3770 `_) +* chore: improve some error messages (`#3775 `_) +* refactor: ``get_search_paths()`` for vyper cli (`#3778 `_) +* chore: replace occurrences of 'enum' by 'flag' (`#3794 `_) +* chore: add another borrowship test (`#3802 `_) +* chore[ux]: improve an exports error message (`#3822 `_) +* chore: improve codegen test coverage report (`#3824 `_) +* chore: improve syntax error messages (`#3885 `_) +* chore[tool]: remove ``vyper-serve`` from ``setup.py`` (`#3936 `_) +* fix[ux]: replace standard strings with f-strings (`#3953 `_) +* chore[ir]: sanity check types in for range codegen (`#3968 `_) + v0.3.10 ("Black Adder") *********************** From f92ef414a551de34b64ae09ba2985ee240244c4c Mon Sep 17 00:00:00 2001 From: Benny Date: Wed, 26 Jun 2024 15:47:04 +1000 Subject: [PATCH 09/12] chore[docs]: add `FUNDING.json` for drips funding (#4167) Add json file to verify Vyper on https://www.drips.network/app/projects --- FUNDING.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 FUNDING.json diff --git a/FUNDING.json b/FUNDING.json new file mode 100644 index 0000000000..301aa05572 --- /dev/null +++ b/FUNDING.json @@ -0,0 +1,7 @@ +{ + "drips": { + "ethereum": { + "ownedBy": "0x70CCBE10F980d80b7eBaab7D2E3A73e87D67B775" + } + } +} From 5067b86906f4a3815c4d7a2d3b64f2694ae3a520 Mon Sep 17 00:00:00 2001 From: Rim Rakhimov Date: Wed, 3 Jul 2024 20:30:07 +0400 Subject: [PATCH 10/12] chore[docs]: update `sourceMap` field descriptions (#4170) * Removed `evm.deployedBytecode.sourceMapFull`, as it does not work in v0.4.0 * Updated `evm.deployedBytecode.sourceMap` to be an object in compiler output * Added `evm.bytecode.sourceMap` into compiler input and compiler output --- docs/compiling-a-contract.rst | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/docs/compiling-a-contract.rst b/docs/compiling-a-contract.rst index 751af980b2..c2cd3ed22c 100644 --- a/docs/compiling-a-contract.rst +++ b/docs/compiling-a-contract.rst @@ -308,10 +308,10 @@ The following example describes the expected input format of ``vyper-json``. (Co // devdoc - Natspec developer documentation // evm.bytecode.object - Bytecode object // evm.bytecode.opcodes - Opcodes list + // evm.bytecode.sourceMap - Source mapping (useful for debugging) // evm.deployedBytecode.object - Deployed bytecode object // evm.deployedBytecode.opcodes - Deployed opcodes list - // evm.deployedBytecode.sourceMap - Solidity-style source mapping - // evm.deployedBytecode.sourceMapFull - Deployed source mapping (useful for debugging) + // evm.deployedBytecode.sourceMap - Deployed source mapping (useful for debugging) // evm.methodIdentifiers - The list of function hashes // // Using `evm`, `evm.bytecode`, etc. will select every target part of that output. @@ -388,15 +388,37 @@ The following example describes the output format of ``vyper-json``. Comments ar // The bytecode as a hex string. "object": "00fe", // Opcodes list (string) - "opcodes": "" + "opcodes": "", + // The deployed source mapping. + "sourceMap": { + "breakpoints": [], + "error_map": {}, + "pc_ast_map": {}, + "pc_ast_map_item_keys": [], + "pc_breakpoints": [], + "pc_jump_map": {}, + "pc_pos_map": {}, + // The deployed source mapping as a string. + "pc_pos_map_compressed": "" + } }, "deployedBytecode": { // The deployed bytecode as a hex string. "object": "00fe", // Deployed opcodes list (string) "opcodes": "", - // The deployed source mapping as a string. - "sourceMap": "" + // The deployed source mapping. + "sourceMap": { + "breakpoints": [], + "error_map": {}, + "pc_ast_map": {}, + "pc_ast_map_item_keys": [], + "pc_breakpoints": [], + "pc_jump_map": {}, + "pc_pos_map": {}, + // The deployed source mapping as a string. + "pc_pos_map_compressed": "" + } }, // The list of function hashes "methodIdentifiers": { From 8931e54f8c577f6d60563ff47588c18e58a04c04 Mon Sep 17 00:00:00 2001 From: Zhipeng Xue <543984341@qq.com> Date: Fri, 26 Jul 2024 09:40:23 +0800 Subject: [PATCH 11/12] chore[test]: fix a type hint (#4173) Description Fix a type check warning reported by Pyre@Google, which was outdated after code modifications. Detail update the return type of function fix_terminal from bool to str, since it could be str after commit 176e7f7 --- tests/functional/grammar/test_grammar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/grammar/test_grammar.py b/tests/functional/grammar/test_grammar.py index de399e84b7..c1d2e1d6e6 100644 --- a/tests/functional/grammar/test_grammar.py +++ b/tests/functional/grammar/test_grammar.py @@ -37,7 +37,7 @@ def test_basic_grammar_empty(): assert len(tree.children) == 0 -def fix_terminal(terminal: str) -> bool: +def fix_terminal(terminal: str) -> str: # these throw exceptions in the grammar for bad in ("\x00", "\\ ", "\x0c"): terminal = terminal.replace(bad, " ") From fc192847932dcac83f0d1a0f8f8679867f525a1e Mon Sep 17 00:00:00 2001 From: HodanPlodky <36966616+HodanPlodky@users.noreply.github.com> Date: Fri, 26 Jul 2024 11:03:53 +0000 Subject: [PATCH 12/12] feat[venom]: offset instruction (#4180) this commit introduces an `offset` instruction that is emitted in the algebraic pass when the add instruction calculates an offset from a code label, which is used for immutables. this allows compilation directly to the magic `OFST` assembly instruction, which does additional constant folding after symbol resolution. --------- Co-authored-by: Charles Cooper --- .../venom/test_algebraic_optimizer.py | 51 +++++++++++++++++++ vyper/venom/passes/algebraic_optimization.py | 16 +++++- vyper/venom/passes/extract_literals.py | 2 +- vyper/venom/venom_to_assembly.py | 6 +++ 4 files changed, 73 insertions(+), 2 deletions(-) diff --git a/tests/unit/compiler/venom/test_algebraic_optimizer.py b/tests/unit/compiler/venom/test_algebraic_optimizer.py index e0368d4197..b5d55efbdc 100644 --- a/tests/unit/compiler/venom/test_algebraic_optimizer.py +++ b/tests/unit/compiler/venom/test_algebraic_optimizer.py @@ -127,3 +127,54 @@ def test_interleaved_case(interleave_point): assert bb.instructions[-1].operands[0] == op3_inv else: assert bb.instructions[-1].operands[0] == op3 + + +def test_offsets(): + ctx = IRContext() + fn = ctx.create_function("_global") + + bb = fn.get_basic_block() + + br1 = IRBasicBlock(IRLabel("then"), fn) + fn.append_basic_block(br1) + br2 = IRBasicBlock(IRLabel("else"), fn) + fn.append_basic_block(br2) + + p1 = bb.append_instruction("param") + op1 = bb.append_instruction("store", 32) + op2 = bb.append_instruction("add", 0, IRLabel("mem")) + op3 = bb.append_instruction("store", 64) + bb.append_instruction("dloadbytes", op1, op2, op3) + op5 = bb.append_instruction("mload", op3) + op6 = bb.append_instruction("iszero", op5) + bb.append_instruction("jnz", op6, br1.label, br2.label) + + op01 = br1.append_instruction("store", 32) + op02 = br1.append_instruction("add", 0, IRLabel("mem")) + op03 = br1.append_instruction("store", 64) + br1.append_instruction("dloadbytes", op01, op02, op03) + op05 = br1.append_instruction("mload", op03) + op06 = br1.append_instruction("iszero", op05) + br1.append_instruction("return", p1, op06) + + op11 = br2.append_instruction("store", 32) + op12 = br2.append_instruction("add", 0, IRLabel("mem")) + op13 = br2.append_instruction("store", 64) + br2.append_instruction("dloadbytes", op11, op12, op13) + op15 = br2.append_instruction("mload", op13) + op16 = br2.append_instruction("iszero", op15) + br2.append_instruction("return", p1, op16) + + ac = IRAnalysesCache(fn) + MakeSSA(ac, fn).run_pass() + AlgebraicOptimizationPass(ac, fn).run_pass() + RemoveUnusedVariablesPass(ac, fn).run_pass() + + offset_count = 0 + for bb in fn.get_basic_blocks(): + for instruction in bb.instructions: + assert instruction.opcode != "add" + if instruction.opcode == "offset": + offset_count += 1 + + assert offset_count == 3 diff --git a/vyper/venom/passes/algebraic_optimization.py b/vyper/venom/passes/algebraic_optimization.py index 4094219a6d..1d375ea988 100644 --- a/vyper/venom/passes/algebraic_optimization.py +++ b/vyper/venom/passes/algebraic_optimization.py @@ -1,6 +1,6 @@ from vyper.venom.analysis.dfg import DFGAnalysis from vyper.venom.analysis.liveness import LivenessAnalysis -from vyper.venom.basicblock import IRInstruction, IROperand +from vyper.venom.basicblock import IRInstruction, IRLabel, IRLiteral, IROperand from vyper.venom.passes.base_pass import IRPass @@ -58,10 +58,24 @@ def _get_iszero_chain(self, op: IROperand) -> list[IRInstruction]: chain.reverse() return chain + def _handle_offsets(self): + for bb in self.function.get_basic_blocks(): + for inst in bb.instructions: + # check if the instruction is of the form + # `add