ethereum · chfast · Jul 31, 2023 · Feb 28, 2023 · Jul 14, 2023 · Jul 15, 2023
diff --git a/include/evmmax/evmmax.hpp b/include/evmmax/evmmax.hpp
@@ -0,0 +1,54 @@
+// evmone: Fast Ethereum Virtual Machine implementation
+// Copyright 2023 The evmone Authors.
+// SPDX-License-Identifier: Apache-2.0
+#pragma once
+
+#include <intx/intx.hpp>
+
+namespace evmmax
+{
+
+/// The modular arithmetic operations for EVMMAX (EVM Modular Arithmetic Extensions).
+template <typename UintT>
+class ModArith
+{
+public:
+    const UintT mod;  ///< The modulus.
+
+private:
+    const UintT m_r_squared;  ///< R² % mod.
+
+    /// The modulus inversion, i.e. the number N' such that mod⋅N' = 2⁶⁴-1.
+    const uint64_t m_mod_inv;
+
+public:
+    explicit ModArith(const UintT& modulus) noexcept;
+
+    /// Converts a value to Montgomery form.
+    ///
+    /// This is done by using Montgomery multiplication mul(x, R²)
+    /// what gives aR²R⁻¹ % mod = aR % mod.
+    UintT to_mont(const UintT& x) const noexcept;
+
+    /// Converts a value in Montgomery form back to normal value.
+    ///
+    /// Given the x is the Montgomery form x = aR, the conversion is done by using
+    /// Montgomery multiplication mul(x, 1) what gives aRR⁻¹ % mod = a % mod.
+    UintT from_mont(const UintT& x) const noexcept;
+
+    /// Performs a Montgomery modular multiplication.
+    ///
+    /// Inputs must be in Montgomery form: x = aR, y = bR.
+    /// This computes Montgomery multiplication xyR⁻¹ % mod what gives aRbRR⁻¹ % mod = abR % mod.
+    /// The result (abR) is in Montgomery form.
+    UintT mul(const UintT& x, const UintT& y) const noexcept;
+
+    /// Performs a modular addition. It is required that x < mod and y < mod, but x and y may be
+    /// but are not required to be in Montgomery form.
+    UintT add(const UintT& x, const UintT& y) const noexcept;
+
+    /// Performs a modular subtraction. It is required that x < mod and y < mod, but x and y may be
+    /// but are not required to be in Montgomery form.
+    UintT sub(const UintT& x, const UintT& y) const noexcept;
+};
+}  // namespace evmmax
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
@@ -1,5 +1,9 @@
 # evmone: Fast Ethereum Virtual Machine implementation
-# Copyright 2018-2019 The evmone Authors.
+# Copyright 2018 The evmone Authors.
 # SPDX-License-Identifier: Apache-2.0
 
+hunter_add_package(intx)
+find_package(intx CONFIG REQUIRED)
+
+add_subdirectory(evmmax)
 add_subdirectory(evmone)
diff --git a/lib/evmmax/CMakeLists.txt b/lib/evmmax/CMakeLists.txt
@@ -0,0 +1,14 @@
+# evmone: Fast Ethereum Virtual Machine implementation
+# Copyright 2023 The evmone Authors.
+# SPDX-License-Identifier: Apache-2.0
+
+add_library(evmmax STATIC)
+add_library(evmone::evmmax ALIAS evmmax)
+target_compile_features(evmmax PUBLIC cxx_std_20)
+target_include_directories(evmmax PUBLIC ${PROJECT_SOURCE_DIR}/include)
+target_link_libraries(evmmax PRIVATE intx::intx)
+target_sources(
+    evmmax PRIVATE
+    ${PROJECT_SOURCE_DIR}/include/evmmax/evmmax.hpp
+    evmmax.cpp
+)
diff --git a/lib/evmmax/evmmax.cpp b/lib/evmmax/evmmax.cpp
@@ -0,0 +1,118 @@
+// evmone: Fast Ethereum Virtual Machine implementation
+// Copyright 2023 The evmone Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+#include <evmmax/evmmax.hpp>
+
+using namespace intx;
+
+namespace evmmax
+{
+namespace
+{
+/// Compute the modulus inverse for Montgomery multiplication, i.e. N': mod⋅N' = 2⁶⁴-1.
+///
+/// @param mod0  The least significant word of the modulus.
+inline constexpr uint64_t compute_mod_inv(uint64_t mod0) noexcept
+{
+    // TODO: Find what is this algorithm and why it works.
+    uint64_t base = 0 - mod0;
+    uint64_t result = 1;
+    for (auto i = 0; i < 64; ++i)
+    {
+        result *= base;
+        base *= base;
+    }
+    return result;
+}
+
+/// Compute R² % mod.
+template <typename UintT>
+inline UintT compute_r_squared(const UintT& mod) noexcept
+{
+    // R is 2^num_bits, R² is 2^(2*num_bits) and needs 2*num_bits+1 bits to represent,
+    // rounded to 2*num_bits+64) for intx requirements.
+    static constexpr auto r2 = intx::uint<UintT::num_bits * 2 + 64>{1} << (UintT::num_bits * 2);
+    return intx::udivrem(r2, mod).rem;
+}
+
+inline constexpr std::pair<uint64_t, uint64_t> addmul(
+    uint64_t t, uint64_t a, uint64_t b, uint64_t c) noexcept
+{
+    const auto p = umul(a, b) + t + c;
+    return {p[1], p[0]};
+}
+}  // namespace
+
+template <typename UintT>
+ModArith<UintT>::ModArith(const UintT& modulus) noexcept
+  : mod{modulus}, m_r_squared{compute_r_squared(modulus)}, m_mod_inv{compute_mod_inv(modulus[0])}
+{}
+
+template <typename UintT>
+UintT ModArith<UintT>::mul(const UintT& x, const UintT& y) const noexcept
+{
+    // Coarsely Integrated Operand Scanning (CIOS) Method
+    // Based on 2.3.2 from
+    // High-Speed Algorithms & Architectures For Number-Theoretic Cryptosystems
+    // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf
+
+    static constexpr auto S = UintT::num_words;
+
+    intx::uint<UintT::num_bits + 64> t;
+    for (size_t i = 0; i != S; ++i)
+    {
+        uint64_t c = 0;
+        for (size_t j = 0; j != S; ++j)
+            std::tie(c, t[j]) = addmul(t[j], x[j], y[i], c);
+        auto tmp = addc(t[S], c);
+        t[S] = tmp.value;
+        auto d = tmp.carry;
+
+        c = 0;
+        auto m = t[0] * m_mod_inv;
+        std::tie(c, t[0]) = addmul(t[0], m, mod[0], c);
+        for (size_t j = 1; j != S; ++j)
+            std::tie(c, t[j - 1]) = addmul(t[j], m, mod[j], c);
+        tmp = addc(t[S], c);
+        t[S - 1] = tmp.value;
+        t[S] = d + tmp.carry;  // TODO: Carry is 0 for sparse modulus.
+    }
+
+    if (t >= mod)  // TODO: cannot overflow if modulus is sparse (e.g. 255 bits).
+        t -= mod;
+
+    return static_cast<UintT>(t);
+}
+
+template <typename UintT>
+UintT ModArith<UintT>::to_mont(const UintT& x) const noexcept
+{
+    return mul(x, m_r_squared);
+}
+
+template <typename UintT>
+UintT ModArith<UintT>::from_mont(const UintT& x) const noexcept
+{
+    return mul(x, 1);
+}
+
+template <typename UintT>
+UintT ModArith<UintT>::add(const UintT& x, const UintT& y) const noexcept
+{
+    const auto s = addc(x, y);  // TODO: cannot overflow if modulus is sparse (e.g. 255 bits).
+    const auto d = subc(s.value, mod);
+    return (!s.carry && d.carry) ? s.value : d.value;
+}
+
+template <typename UintT>
+UintT ModArith<UintT>::sub(const UintT& x, const UintT& y) const noexcept
+{
+    const auto d = subc(x, y);
+    const auto s = d.value + mod;
+    return (d.carry) ? s : d.value;
-    const auto s = d.value + mod;
-    return (d.carry) ? s : d.value;
+    return d.carry ? (d.value + mod) : d.value;
-    const auto s = d.value + mod;
-    return (d.carry) ? s : d.value;
+    return d.carry ? (d.value + mod) : d.value;
+}
+
+template class ModArith<uint256>;
+template class ModArith<uint384>;
+}  // namespace evmmax
diff --git a/lib/evmone/CMakeLists.txt b/lib/evmone/CMakeLists.txt
@@ -4,9 +4,6 @@
 
 include(LibraryTools)
 
-hunter_add_package(intx)
-find_package(intx CONFIG REQUIRED)
-
 add_library(evmone
     ${include_dir}/evmone/evmone.h
     advanced_analysis.cpp

diff --git a/test/internal_benchmarks/CMakeLists.txt b/test/internal_benchmarks/CMakeLists.txt
@@ -4,8 +4,9 @@
 
 add_executable(
     evmone-bench-internal
+    evmmax_bench.cpp
     find_jumpdest_bench.cpp
     memory_allocation.cpp
 )
 
-target_link_libraries(evmone-bench-internal PRIVATE benchmark::benchmark)
+target_link_libraries(evmone-bench-internal PRIVATE evmone::evmmax benchmark::benchmark)
diff --git a/test/internal_benchmarks/evmmax_bench.cpp b/test/internal_benchmarks/evmmax_bench.cpp
@@ -0,0 +1,63 @@
+// evmone: Fast Ethereum Virtual Machine implementation
+// Copyright 2023 The evmone Authors.
+// SPDX-License-Identifier: Apache-2.0
+
+#include <benchmark/benchmark.h>
+#include <evmmax/evmmax.hpp>
+
+using namespace intx;
+
+namespace
+{
+constexpr auto bn254 = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47_u256;
+constexpr auto secp256k1 = 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f_u256;
+
+template <typename UintT, const UintT& Mod>
+void evmmax_add(benchmark::State& state)
+{
+    const evmmax::ModArith<UintT> m{Mod};
+    auto a = Mod / 2;
+    auto b = Mod / 3;
+
+    while (state.KeepRunningBatch(2))
+    {
+        a = m.add(a, b);
+        b = m.add(b, a);
+    }
+}
+
+template <typename UintT, const UintT& Mod>
+void evmmax_sub(benchmark::State& state)
+{
+    const evmmax::ModArith<UintT> m{Mod};
+    auto a = Mod / 2;
+    auto b = Mod / 3;
+
+    while (state.KeepRunningBatch(2))
+    {
+        a = m.sub(a, b);
+        b = m.sub(b, a);
+    }
+}
+
+template <typename UintT, const UintT& Mod>
+void evmmax_mul(benchmark::State& state)
+{
+    const evmmax::ModArith<UintT> m{Mod};
+    auto a = m.to_mont(Mod / 2);
+    auto b = m.to_mont(Mod / 3);
+
+    while (state.KeepRunningBatch(2))
+    {
+        a = m.mul(a, b);
+        b = m.mul(b, a);
+    }
+}
+}  // namespace
+
+BENCHMARK_TEMPLATE(evmmax_add, uint256, bn254);
+BENCHMARK_TEMPLATE(evmmax_add, uint256, secp256k1);
+BENCHMARK_TEMPLATE(evmmax_sub, uint256, bn254);
+BENCHMARK_TEMPLATE(evmmax_sub, uint256, secp256k1);
+BENCHMARK_TEMPLATE(evmmax_mul, uint256, bn254);
+BENCHMARK_TEMPLATE(evmmax_mul, uint256, secp256k1);
diff --git a/test/unittests/CMakeLists.txt b/test/unittests/CMakeLists.txt
@@ -29,6 +29,7 @@ target_sources(
     evm_storage_test.cpp
     evm_other_test.cpp
     evm_benchmark_test.cpp
+    evmmax_test.cpp
     evmone_test.cpp
     execution_state_test.cpp
     instructions_test.cpp
@@ -48,7 +49,7 @@ target_sources(
     statetest_logs_hash_test.cpp
     tracing_test.cpp
 )
-target_link_libraries(evmone-unittests PRIVATE evmone evmone::state evmone::statetestutils testutils evmc::instructions GTest::gtest GTest::gtest_main)
+target_link_libraries(evmone-unittests PRIVATE evmone evmone::evmmax evmone::state evmone::statetestutils testutils evmc::instructions GTest::gtest GTest::gtest_main)
 target_include_directories(evmone-unittests PRIVATE ${evmone_private_include_dir})
 
 gtest_discover_tests(evmone-unittests TEST_PREFIX ${PROJECT_NAME}/unittests/)