From 21a78e3965f98ab9bf3cb75493e36d1469fe269c Mon Sep 17 00:00:00 2001 From: elijahhampton Date: Fri, 27 Sep 2024 15:28:13 -0400 Subject: [PATCH] Optimize EOF by reading types on demand Introduces `EOF1Header::get_type` function to read individual EOF code types. This is fast procedure because we need to read at most 4 bytes and a know offset. By doing so we avoid allocating separated vector to types. --- lib/evmone/eof.cpp | 112 ++++++++++++++++++------------------ lib/evmone/eof.hpp | 57 ++++++++++++------ lib/evmone/instructions.hpp | 10 ++-- test/unittests/eof_test.cpp | 3 +- 4 files changed, 101 insertions(+), 81 deletions(-) diff --git a/lib/evmone/eof.cpp b/lib/evmone/eof.cpp index 562a70d811..7f50280685 100644 --- a/lib/evmone/eof.cpp +++ b/lib/evmone/eof.cpp @@ -205,43 +205,43 @@ std::variant validate_section_headers(byt if (remaining_container_size < section_bodies_without_data) return EOFValidationError::invalid_section_bodies_size; - if (section_headers[TYPE_SECTION][0] != section_headers[CODE_SECTION].size() * 4) + if (section_headers[TYPE_SECTION][0] != + section_headers[CODE_SECTION].size() * EOF1Header::TYPE_ENTRY_SIZE) return EOFValidationError::invalid_type_section_size; return section_headers; } -std::variant, EOFValidationError> validate_types( - bytes_view container, size_t header_size, uint16_t type_section_size) noexcept +EOFValidationError validate_types( + bytes_view container, size_t type_section_offset, uint16_t type_section_size) noexcept { assert(!container.empty()); // guaranteed by EOF headers validation - std::vector types; // guaranteed by EOF headers validation - assert(header_size + type_section_size < container.size()); + assert(type_section_offset + type_section_size < container.size()); - for (auto offset = header_size; offset < header_size + type_section_size; offset += 4) + const auto num_types = type_section_size / EOF1Header::TYPE_ENTRY_SIZE; + for (size_t i = 0; i < num_types; ++i) { - types.emplace_back( - container[offset], container[offset + 1], read_uint16_be(&container[offset + 2])); - } + const auto offset = type_section_offset + (i * EOF1Header::TYPE_ENTRY_SIZE); + const auto inputs = container[offset]; + const auto outputs = container[offset + 1]; + const auto max_stack_height = read_uint16_be(&container[offset + 2]); - // check 1st section is (0, 0x80) - if (types[0].inputs != 0 || types[0].outputs != NON_RETURNING_FUNCTION) - return EOFValidationError::invalid_first_section_type; + // First type should be (0, 0x80) + if (i == 0 && (inputs != 0 || outputs != NON_RETURNING_FUNCTION)) + return EOFValidationError::invalid_first_section_type; - for (const auto& t : types) - { - if ((t.outputs > OUTPUTS_INPUTS_NUMBER_LIMIT && t.outputs != NON_RETURNING_FUNCTION) || - t.inputs > OUTPUTS_INPUTS_NUMBER_LIMIT) + if ((outputs > OUTPUTS_INPUTS_NUMBER_LIMIT && outputs != NON_RETURNING_FUNCTION) || + inputs > OUTPUTS_INPUTS_NUMBER_LIMIT) return EOFValidationError::inputs_outputs_num_above_limit; - if (t.max_stack_height > MAX_STACK_HEIGHT) + if (max_stack_height > MAX_STACK_HEIGHT) return EOFValidationError::max_stack_height_above_limit; } - return types; + return EOFValidationError::success; } /// Result of validating instructions in a code section. @@ -286,9 +286,11 @@ std::variant validate_instructi else if (op == OP_CALLF) { const auto fid = read_uint16_be(&code[i + 1]); - if (fid >= header.types.size()) + if (fid >= header.code_sizes.size()) return EOFValidationError::invalid_code_section_index; - if (header.types[fid].outputs == NON_RETURNING_FUNCTION) + + const auto type = header.get_type(container, fid); + if (type.outputs == NON_RETURNING_FUNCTION) return EOFValidationError::callf_to_non_returning_function; if (code_idx != fid) accessed_code_sections.insert(fid); @@ -302,10 +304,12 @@ std::variant validate_instructi else if (op == OP_JUMPF) { const auto fid = read_uint16_be(&code[i + 1]); - if (fid >= header.types.size()) + if (fid >= header.code_sizes.size()) return EOFValidationError::invalid_code_section_index; + + const auto type = header.get_type(container, fid); // JUMPF into returning function means current function is returning. - if (header.types[fid].outputs != NON_RETURNING_FUNCTION) + if (type.outputs != NON_RETURNING_FUNCTION) is_returning = true; if (code_idx != fid) accessed_code_sections.insert(fid); @@ -342,7 +346,8 @@ std::variant validate_instructi i += instr::traits[op].immediate_size; } - const auto declared_returning = (header.types[code_idx].outputs != NON_RETURNING_FUNCTION); + const auto declared_returning = + header.get_type(container, code_idx).outputs != NON_RETURNING_FUNCTION; if (is_returning != declared_returning) return EOFValidationError::invalid_non_returning_flag; @@ -410,7 +415,7 @@ bool validate_rjump_destinations(bytes_view code) noexcept /// Requires that the input is validated against truncation. std::variant validate_max_stack_height( - bytes_view code, size_t func_index, const std::vector& code_types) + bytes_view code, size_t func_index, const EOF1Header& header, bytes_view container) { // Special value used for detecting errors. static constexpr int32_t LOC_UNVISITED = -1; // Unvisited byte. @@ -427,8 +432,9 @@ std::variant validate_max_stack_height( assert(!code.empty()); + const auto type = header.get_type(container, func_index); std::vector stack_heights(code.size()); - stack_heights[0] = {code_types[func_index].inputs, code_types[func_index].inputs}; + stack_heights[0] = {type.inputs, type.inputs}; for (size_t i = 0; i < code.size();) { @@ -448,37 +454,36 @@ std::variant validate_max_stack_height( if (opcode == OP_CALLF) { const auto fid = read_uint16_be(&code[i + 1]); + const auto callee_type = header.get_type(container, fid); + stack_height_required = callee_type.inputs; - stack_height_required = code_types[fid].inputs; - - if (stack_height.max + code_types[fid].max_stack_height - stack_height_required > + if (stack_height.max + callee_type.max_stack_height - stack_height_required > STACK_SIZE_LIMIT) return EOFValidationError::stack_overflow; // Instruction validation ensures target function is returning - assert(code_types[fid].outputs != NON_RETURNING_FUNCTION); - stack_height_change = - static_cast(code_types[fid].outputs - stack_height_required); + assert(callee_type.outputs != NON_RETURNING_FUNCTION); + stack_height_change = static_cast(callee_type.outputs - stack_height_required); } else if (opcode == OP_JUMPF) { const auto fid = read_uint16_be(&code[i + 1]); + const auto callee_type = header.get_type(container, fid); - if (stack_height.max + code_types[fid].max_stack_height - code_types[fid].inputs > + if (stack_height.max + callee_type.max_stack_height - callee_type.inputs > STACK_SIZE_LIMIT) return EOFValidationError::stack_overflow; - if (code_types[fid].outputs == NON_RETURNING_FUNCTION) + if (callee_type.outputs == NON_RETURNING_FUNCTION) { - stack_height_required = code_types[fid].inputs; + stack_height_required = callee_type.inputs; } else { - if (code_types[func_index].outputs < code_types[fid].outputs) + if (type.outputs < callee_type.outputs) return EOFValidationError::jumpf_destination_incompatible_outputs; - stack_height_required = code_types[func_index].outputs + code_types[fid].inputs - - code_types[fid].outputs; + stack_height_required = type.outputs + callee_type.inputs - callee_type.outputs; // JUMPF to returning function requires exact number of stack items // and is allowed only in constant stack segment. @@ -488,7 +493,7 @@ std::variant validate_max_stack_height( } else if (opcode == OP_RETF) { - stack_height_required = code_types[func_index].outputs; + stack_height_required = type.outputs; // RETF allowed only in constant stack segment if (stack_height.max > stack_height_required) return EOFValidationError::stack_higher_than_outputs_required; @@ -665,10 +670,11 @@ EOFValidationError validate_eof1( // Validate stack auto msh_or_error = validate_max_stack_height( - header.get_code(container, code_idx), code_idx, header.types); + header.get_code(container, code_idx), code_idx, header, container); if (const auto* error = std::get_if(&msh_or_error)) return *error; - if (std::get(msh_or_error) != header.types[code_idx].max_stack_height) + if (std::get(msh_or_error) != + header.get_type(container, code_idx).max_stack_height) return EOFValidationError::invalid_max_stack_height; } @@ -759,14 +765,17 @@ std::variant validate_header( const auto header_size = eof_header_size(section_headers); - const auto types_or_error = - validate_types(container, header_size, section_headers[TYPE_SECTION].front()); - if (const auto* error = std::get_if(&types_or_error)) - return *error; - const auto& types = std::get>(types_or_error); + const auto type_section_offset = header_size; + const auto type_section_size = section_headers[TYPE_SECTION].front(); + + if (type_section_size != code_sizes.size() * EOF1Header::TYPE_ENTRY_SIZE) + return EOFValidationError::invalid_type_section_size; + + const auto validation_error = validate_types(container, type_section_offset, type_section_size); + if (validation_error != EOFValidationError::success) + return validation_error; std::vector code_offsets; - const auto type_section_size = section_headers[TYPE_SECTION][0]; auto offset = header_size + type_section_size; for (const auto code_size : code_sizes) @@ -790,13 +799,13 @@ std::variant validate_header( return EOF1Header{ .version = container[2], + .type_section_offset = type_section_offset, .code_sizes = code_sizes, .code_offsets = code_offsets, .data_size = data_size, .data_offset = data_offset, .container_sizes = container_sizes, .container_offsets = container_offsets, - .types = types, }; } @@ -829,15 +838,8 @@ EOF1Header read_valid_eof1_header(bytes_view container) const auto header_size = eof_header_size(section_headers); EOF1Header header; - header.version = container[2]; - - for (auto type_offset = header_size; - type_offset < header_size + section_headers[TYPE_SECTION][0]; type_offset += 4) - { - header.types.emplace_back(container[type_offset], container[type_offset + 1], - read_uint16_be(&container[type_offset + 2])); - } + header.type_section_offset = header_size; header.code_sizes = section_headers[CODE_SECTION]; auto code_offset = header_size + section_headers[TYPE_SECTION][0]; diff --git a/lib/evmone/eof.hpp b/lib/evmone/eof.hpp index 7fd6629177..b811690500 100644 --- a/lib/evmone/eof.hpp +++ b/lib/evmone/eof.hpp @@ -15,6 +15,24 @@ namespace evmone { +/// Loads big endian int16_t from data. Unsafe. +/// TODO: Move it to intx +inline int16_t read_int16_be(auto it) noexcept +{ + const uint8_t h = *it++; + const uint8_t l = *it; + return static_cast((h << 8) | l); +} + +/// Loads big endian uint16_t from data. Unsafe. +/// TODO: Move it to intx +inline uint16_t read_uint16_be(auto it) noexcept +{ + const uint8_t h = *it++; + const uint8_t l = *it; + return static_cast((h << 8) | l); +} + using evmc::bytes; using evmc::bytes_view; using namespace evmc::literals; @@ -40,9 +58,15 @@ struct EOFCodeType struct EOF1Header { + /// Size of a type entry in bytes. + static constexpr size_t TYPE_ENTRY_SIZE = sizeof(EOFCodeType); + /// The EOF version, 0 means legacy code. uint8_t version = 0; + /// Offset of the type section start. + size_t type_section_offset = 0; + /// Size of every code section. std::vector code_sizes; @@ -62,7 +86,20 @@ struct EOF1Header /// Offset of every container section start; std::vector container_offsets; - std::vector types; + /// A helper to extract reference to a specific type section. + [[nodiscard]] EOFCodeType get_type(bytes_view container, size_t type_idx) const noexcept + { + const auto offset = type_section_offset + type_idx * TYPE_ENTRY_SIZE; + // TODO: Make EOFCodeType aggregate type and use designated initializers. + return EOFCodeType{ + container[offset], // inputs + container[offset + 1], // outputs + read_uint16_be(&container[offset + 2]) // max_stack_height + }; + } + + /// Returns the number of types in the type section. + [[nodiscard]] size_t get_type_count() const noexcept { return code_sizes.size(); } /// A helper to extract reference to a specific code section. [[nodiscard]] bytes_view get_code(bytes_view container, size_t code_idx) const noexcept @@ -182,22 +219,4 @@ enum class ContainerKind : uint8_t /// Output operator for EOFValidationError. EVMC_EXPORT std::ostream& operator<<(std::ostream& os, EOFValidationError err) noexcept; -/// Loads big endian int16_t from data. Unsafe. -/// TODO: Move it to intx -inline int16_t read_int16_be(auto it) noexcept -{ - const uint8_t h = *it++; - const uint8_t l = *it; - return static_cast((h << 8) | l); -} - -/// Loads big endian uint16_t from data. Unsafe. -/// TODO: Move it to intx -inline uint16_t read_uint16_be(auto it) noexcept -{ - const uint8_t h = *it++; - const uint8_t l = *it; - return static_cast((h << 8) | l); -} - } // namespace evmone diff --git a/lib/evmone/instructions.hpp b/lib/evmone/instructions.hpp index 6a3c66e321..23d706f8b9 100644 --- a/lib/evmone/instructions.hpp +++ b/lib/evmone/instructions.hpp @@ -1101,9 +1101,8 @@ inline code_iterator callf(StackTop stack, ExecutionState& state, code_iterator const auto index = read_uint16_be(&pos[1]); const auto& header = state.analysis.baseline->eof_header(); const auto stack_size = &stack.top() - state.stack_space.bottom(); - - const auto callee_required_stack_size = - header.types[index].max_stack_height - header.types[index].inputs; + const auto callee_type = header.get_type(state.original_code, index); + const auto callee_required_stack_size = callee_type.max_stack_height - callee_type.inputs; if (stack_size + callee_required_stack_size > StackSpace::limit) { state.status = EVMC_STACK_OVERFLOW; @@ -1134,9 +1133,8 @@ inline code_iterator jumpf(StackTop stack, ExecutionState& state, code_iterator const auto index = read_uint16_be(&pos[1]); const auto& header = state.analysis.baseline->eof_header(); const auto stack_size = &stack.top() - state.stack_space.bottom(); - - const auto callee_required_stack_size = - header.types[index].max_stack_height - header.types[index].inputs; + const auto callee_type = header.get_type(state.original_code, index); + const auto callee_required_stack_size = callee_type.max_stack_height - callee_type.inputs; if (stack_size + callee_required_stack_size > StackSpace::limit) { state.status = EVMC_STACK_OVERFLOW; diff --git a/test/unittests/eof_test.cpp b/test/unittests/eof_test.cpp index 1d03b8ddba..bdf4289df7 100644 --- a/test/unittests/eof_test.cpp +++ b/test/unittests/eof_test.cpp @@ -101,7 +101,8 @@ TEST(eof, read_valid_eof1_header) const auto header = read_valid_eof1_header(code); EXPECT_EQ(header.code_sizes, test_case.code_sizes) << test_case.code; EXPECT_EQ(header.data_size, test_case.data_size) << test_case.code; - EXPECT_EQ(header.types.size() * 4, test_case.types_size) << test_case.code; + EXPECT_EQ(header.get_type_count(), test_case.types_size / EOF1Header::TYPE_ENTRY_SIZE) + << test_case.code; EXPECT_EQ(header.container_sizes, test_case.container_sizes) << test_case.code; } }