From 5425693f5b4351748e7c66d738b3800d64d0d7d8 Mon Sep 17 00:00:00 2001 From: Zachary James Williamson Date: Wed, 17 May 2023 19:47:42 +0200 Subject: [PATCH] Zw/noir recursion 2 (#414) * removed redundant `reduce` operations after negating biggroup elements simplified hash input structure when hashing transcripts cached partial non native field multiplications reverted how native transcript computes hash buffers pedersen_plookup can be configured to skip the hash_single range check under limited conditions fixed the range check in pedersen_plookup::hash_single pedersen_plookup::hash_single now validates the low and high scalar slice values match the original scalar bigfield::operator- now correctly uses the UltraPlonk code path if able to added biggroup::multiple_montgomery_ladder to reduce required field multiplications added biggroup::quadruple_and_add to reduce required field multiplications biggroup_nafs now directly calls the Composer range constraint methods to avoid creating redundant arithmetic gates when using the PlookupComposer biggroup plookup ROM tables now track the maximum size of any field element recovered from the table (i.e. the maximum of the input maximum sizes) biggroup batch tables prefer to create size-6 lookup tables if doing so reduces the number of individual tables required for a given MSM recursion::transcript no longer performs redundant range constraints when adding buffer elements recursion::transcript correctly checks that, when slicing field elements , the slice values are correct over the integers (i.e. slice_sum != original + p) recursion::verification_key now optimally packs key data into minimum required number of field elements before hashing recursion::verifier proof and key data is now correctly extracted from the transcript/key instead of being generated directly as witnesses. cleaned up code + comments code tidy, added more comments cleaned up how aggregation object handles public inputs native verification_key::compress matches circuit output fixed compile errors + failing tests compiler error join_split.test.cpp passing Note: not changing any upstream .js verification keys. I don't think we need to as bberg is now decoupled from aztec connect * compiler fix * more compiler fix * attempt to fix .js and .sol tests * revert keccak transcript to original functionality * added hash_index back into verification_key::compress fixed composer bug where `decompose_into_default_range` was sometimes not range-constraining last limb removed commented-out code added more descriptive comments to PedersenPreimageBuilder * changed join-split vkey * temporarily point to branch of aztec that updates aggregation state usage until fix is in aztec master * revert .aztec-packages-commit * header brittleness fix * compiler fix * compiler fix w. aggregation object * reverting changes to `assign_object_to_proof_outputs` to preserve backwards-compatibility with a3-packages * more backwards compatibility fixes * wip --------- Co-authored-by: dbanks12 Co-authored-by: David Banks <47112877+dbanks12@users.noreply.github.com> --- .../convert_buffer_to_field.hpp | 10 + .../crypto/pedersen_commitment/pedersen.cpp | 8 +- .../crypto/pedersen_commitment/pedersen.hpp | 2 +- .../pedersen_commitment/pedersen_lookup.cpp | 19 +- .../pedersen_commitment/pedersen_lookup.hpp | 4 +- .../pedersen_lookup.test.cpp | 20 +- .../honk/composer/ultra_honk_composer.hpp | 5 +- .../composer/ultra_honk_composer.test.cpp | 3 +- .../barretenberg/honk/proof_system/prover.hpp | 3 +- .../honk/proof_system/ultra_prover.hpp | 3 +- .../proofs/join_split/join_split.test.cpp | 5 +- .../splitting_tmp/ultra_plonk_composer.hpp | 7 +- .../ultra_plonk_composer.test.cpp | 2 +- .../plonk/composer/ultra_composer.cpp | 259 ++++++----- .../plonk/composer/ultra_composer.hpp | 61 +-- .../plonk/composer/ultra_composer.test.cpp | 2 +- .../verification_key/verification_key.cpp | 57 +-- .../ultra_circuit_constructor.cpp | 219 +++++----- .../ultra_circuit_constructor.hpp | 119 +++-- .../ultra_circuit_constructor.test.cpp | 2 +- .../circuits/recursive_circuit.hpp | 2 +- .../commitment/pedersen/pedersen_plookup.cpp | 82 +++- .../commitment/pedersen/pedersen_plookup.hpp | 11 +- .../stdlib/hash/pedersen/pedersen_plookup.cpp | 43 +- .../stdlib/hash/pedersen/pedersen_plookup.hpp | 2 +- .../primitives/bigfield/bigfield_impl.hpp | 22 +- .../stdlib/primitives/biggroup/biggroup.hpp | 241 ++++------ .../primitives/biggroup/biggroup.test.cpp | 30 +- .../biggroup/biggroup_batch_mul.hpp | 11 +- .../primitives/biggroup/biggroup_bn254.hpp | 80 ++-- .../primitives/biggroup/biggroup_impl.hpp | 412 ++++++++++-------- .../primitives/biggroup/biggroup_nafs.hpp | 43 +- .../biggroup/biggroup_secp256k1.hpp | 13 +- .../primitives/biggroup/biggroup_tables.hpp | 272 +++++++----- .../aggregation_state/aggregation_state.hpp | 38 +- .../recursion/transcript/transcript.hpp | 253 +++++------ .../verification_key/verification_key.hpp | 301 +++++++++---- .../stdlib/recursion/verifier/verifier.hpp | 58 +-- .../recursion/verifier/verifier.test.cpp | 57 ++- .../verifier/verifier_turbo.test.cpp | 2 +- .../barretenberg/transcript/transcript.cpp | 18 +- .../keys/RecursiveUltraVerificationKey.sol | 104 ++--- 42 files changed, 1611 insertions(+), 1294 deletions(-) diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp b/cpp/src/barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp index 10adacf4ce6..9b657280adc 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp @@ -5,6 +5,16 @@ namespace crypto { namespace pedersen_commitment { +/** + * @brief Converts input uint8_t buffers into vector of field elements. Used to hash the Transcript in a SNARK-friendly + * manner for recursive circuits. + * + * `buffer` is an unstructured byte array we want to convert these into field elements + * prior to hashing. We do this by splitting buffer into 31-byte chunks. + * + * @param buffer + * @return std::vector + */ inline std::vector convert_buffer_to_field(const std::vector& input) { const size_t num_bytes = input.size(); diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.cpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.cpp index 639dfa2440d..8e3aa99ef15 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.cpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.cpp @@ -105,16 +105,16 @@ grumpkin::fq compress_native(const std::vector& input) +grumpkin::fq compress_native_buffer_to_field(const std::vector& input, const size_t hash_index) { const auto elements = convert_buffer_to_field(input); - grumpkin::fq result_fq = compress_native(elements); + grumpkin::fq result_fq = compress_native(elements, hash_index); return result_fq; } -grumpkin::fq compress_native(const std::vector& input) +grumpkin::fq compress_native(const std::vector& input, const size_t hash_index) { - return compress_native_buffer_to_field(input); + return compress_native_buffer_to_field(input, hash_index); } } // namespace pedersen_commitment diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.hpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.hpp index d7275aa6ac7..0600e13b529 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.hpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen.hpp @@ -22,7 +22,7 @@ template grumpkin::fq compress_native(const std::array& input); +grumpkin::fq compress_native(const std::vector& input, const size_t hash_index = 0); grumpkin::fq compress_native(const std::vector>& input_pairs); diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.cpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.cpp index e2333acfa6e..5e6288e8dfa 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.cpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.cpp @@ -28,11 +28,12 @@ grumpkin::g1::element merkle_damgard_compress(const std::vector& i const size_t num_inputs = inputs.size(); grumpkin::fq result = (pedersen_iv_table[iv]).x; - for (size_t i = 0; i < num_inputs; i++) { + result = hash_pair(result, num_inputs); + for (size_t i = 0; i < num_inputs - 1; i++) { result = hash_pair(result, inputs[i]); } - return (hash_single(result, false) + hash_single(grumpkin::fq(num_inputs), true)); + return (hash_single(result, false) + hash_single(inputs[num_inputs - 1], true)); } grumpkin::g1::element merkle_damgard_compress(const std::vector& inputs, const std::vector& ivs) @@ -46,7 +47,8 @@ grumpkin::g1::element merkle_damgard_compress(const std::vector& i const size_t num_inputs = inputs.size(); grumpkin::fq result = (pedersen_iv_table[0]).x; - for (size_t i = 0; i < 2 * num_inputs; i++) { + result = hash_pair(result, num_inputs); + for (size_t i = 0; i < 2 * num_inputs - 1; i++) { if ((i & 1) == 0) { grumpkin::fq iv_result = (pedersen_iv_table[ivs[i >> 1]]).x; result = hash_pair(result, iv_result); @@ -54,8 +56,7 @@ grumpkin::g1::element merkle_damgard_compress(const std::vector& i result = hash_pair(result, inputs[i >> 1]); } } - - return (hash_single(result, false) + hash_single(grumpkin::fq(num_inputs), true)); + return (hash_single(result, false) + hash_single(inputs[num_inputs - 1], true)); } grumpkin::g1::element merkle_damgard_tree_compress(const std::vector& inputs, @@ -111,16 +112,16 @@ grumpkin::fq compress_native(const std::vector& inputs, const std: return commit_native(inputs, hash_indices).x; } -grumpkin::fq compress_native_buffer_to_field(const std::vector& input) +grumpkin::fq compress_native_buffer_to_field(const std::vector& input, const size_t hash_index) { const auto elements = convert_buffer_to_field(input); - grumpkin::fq result_fq = compress_native(elements); + grumpkin::fq result_fq = compress_native(elements, hash_index); return result_fq; } -std::vector compress_native(const std::vector& input) +std::vector compress_native(const std::vector& input, const size_t hash_index) { - const auto result_fq = compress_native_buffer_to_field(input); + const auto result_fq = compress_native_buffer_to_field(input, hash_index); uint256_t result_u256(result_fq); const size_t num_bytes = input.size(); diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.hpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.hpp index 0f99b13fbbd..b77fac9688d 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.hpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.hpp @@ -13,9 +13,9 @@ grumpkin::g1::element merkle_damgard_tree_compress(const std::vector& inputs, const size_t hash_index = 0); grumpkin::fq compress_native(const std::vector& inputs, const std::vector& hash_indices); -std::vector compress_native(const std::vector& input); +std::vector compress_native(const std::vector& input, const size_t hash_index = 0); -grumpkin::fq compress_native_buffer_to_field(const std::vector& input); +grumpkin::fq compress_native_buffer_to_field(const std::vector& input, const size_t hash_index = 0); template grumpkin::fq compress_native(const std::array& inputs) { diff --git a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.test.cpp b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.test.cpp index 82d0b4f7ed0..a06f5cea588 100644 --- a/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.test.cpp +++ b/cpp/src/barretenberg/crypto/pedersen_commitment/pedersen_lookup.test.cpp @@ -157,7 +157,9 @@ TEST(pedersen_lookup, merkle_damgard_compress) const auto result = crypto::pedersen_commitment::lookup::merkle_damgard_compress(inputs, iv); - fq intermediate = (grumpkin::g1::affine_one * fr(iv + 1)).x; + auto iv_hash = compute_expected((grumpkin::g1::affine_one * fr(iv + 1)).x, 0); + auto length = compute_expected(fq(m), (crypto::pedersen_hash::lookup::NUM_PEDERSEN_TABLES / 2)); + fq intermediate = affine_element(iv_hash + length).x; for (size_t i = 0; i < m; i++) { intermediate = affine_element(compute_expected(intermediate, 0) + @@ -165,10 +167,7 @@ TEST(pedersen_lookup, merkle_damgard_compress) .x; } - EXPECT_EQ(affine_element(result).x, - affine_element(compute_expected(intermediate, 0) + - compute_expected(fq(m), (crypto::pedersen_hash::lookup::NUM_PEDERSEN_TABLES / 2))) - .x); + EXPECT_EQ(affine_element(result).x, intermediate); } TEST(pedersen_lookup, merkle_damgard_compress_multiple_iv) @@ -188,7 +187,11 @@ TEST(pedersen_lookup, merkle_damgard_compress_multiple_iv) const auto result = crypto::pedersen_commitment::lookup::merkle_damgard_compress(inputs, ivs); const size_t initial_iv = 0; - fq intermediate = (grumpkin::g1::affine_one * fr(initial_iv + 1)).x; + auto iv_hash = compute_expected((grumpkin::g1::affine_one * fr(initial_iv + 1)).x, 0); + + auto length = compute_expected(fq(m), (crypto::pedersen_hash::lookup::NUM_PEDERSEN_TABLES / 2)); + fq intermediate = affine_element(iv_hash + length).x; + for (size_t i = 0; i < 2 * m; i++) { if ((i & 1) == 0) { const auto iv = (grumpkin::g1::affine_one * fr(ivs[i >> 1] + 1)).x; @@ -204,10 +207,7 @@ TEST(pedersen_lookup, merkle_damgard_compress_multiple_iv) } } - EXPECT_EQ(affine_element(result).x, - affine_element(compute_expected(intermediate, 0) + - compute_expected(fq(m), (crypto::pedersen_hash::lookup::NUM_PEDERSEN_TABLES / 2))) - .x); + EXPECT_EQ(affine_element(result).x, intermediate); } TEST(pedersen_lookup, merkle_damgard_tree_compress) diff --git a/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp b/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp index e17087aa887..1073ec7825c 100644 --- a/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp +++ b/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp @@ -371,11 +371,12 @@ class UltraHonkComposer { }; // std::array decompose_non_native_field_double_width_limb( // const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array queue_non_native_field_multiplication( + std::array evaluate_non_native_field_multiplication( const UltraCircuitConstructor::non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true) { - return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder); + return circuit_constructor.evaluate_non_native_field_multiplication(input, + range_constrain_quotient_and_remainder); }; // std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& // input); typedef std::pair scaled_witness; typedef std::tuple concept StandardFlavor = IsAnyOf; +template +concept StandardFlavor = IsAnyOf; template class StandardProver_ { diff --git a/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp b/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp index bcf665711c0..9bbe7314f2d 100644 --- a/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp +++ b/cpp/src/barretenberg/honk/proof_system/ultra_prover.hpp @@ -13,7 +13,8 @@ namespace proof_system::honk { // We won't compile this class with honk::flavor::Standard, but we will like want to compile it (at least for testing) // with a flavor that uses the curve Grumpkin, or a flavor that does/does not have zk, etc. -template concept UltraFlavor = IsAnyOf; +template +concept UltraFlavor = IsAnyOf; template class UltraProver_ { using FF = typename Flavor::FF; diff --git a/cpp/src/barretenberg/join_split_example/proofs/join_split/join_split.test.cpp b/cpp/src/barretenberg/join_split_example/proofs/join_split/join_split.test.cpp index aa7e516609e..2b81096c420 100644 --- a/cpp/src/barretenberg/join_split_example/proofs/join_split/join_split.test.cpp +++ b/cpp/src/barretenberg/join_split_example/proofs/join_split/join_split.test.cpp @@ -806,11 +806,12 @@ TEST_F(join_split_tests, test_0_input_notes_and_detect_circuit_change) // The below part detects any changes in the join-split circuit - constexpr uint32_t CIRCUIT_GATE_COUNT = 185573; + constexpr uint32_t CIRCUIT_GATE_COUNT = 183834; constexpr uint32_t GATES_NEXT_POWER_OF_TWO = 524288; - const uint256_t VK_HASH("13eb88883e80efb9bf306af2962cd1a49e9fa1b0bfb2d4b563b95217a17bcc74"); + const uint256_t VK_HASH("5c2e0fe914dbbf23d6bac6ae4db9a7e43d98c0b9d71c9200208dbce24a815c6e"); auto number_of_gates_js = result.number_of_gates; + std::cout << get_verification_key()->sha256_hash() << std::endl; auto vk_hash_js = get_verification_key()->sha256_hash(); if (!CIRCUIT_CHANGE_EXPECTED) { diff --git a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp index 53c8ff8f903..703e037e641 100644 --- a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp +++ b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp @@ -380,13 +380,14 @@ class UltraPlonkComposer { }; // std::array decompose_non_native_field_double_width_limb( // const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array queue_non_native_field_multiplication( + std::array evaluate_non_native_field_multiplication( const UltraCircuitConstructor::non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true) { - return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder); + return circuit_constructor.evaluate_non_native_field_multiplication(input, + range_constrain_quotient_and_remainder); }; - // std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& + // std::array queue_partial_non_native_field_multiplication(const non_native_field_witnesses& // input); typedef std::pair scaled_witness; typedef std::tuple add_simple; std::array evaluate_non_native_field_subtraction( // add_simple limb0, diff --git a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp index a5f6e09822d..fe95dfd298a 100644 --- a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp +++ b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp @@ -781,7 +781,7 @@ TEST(ultra_plonk_composer_splitting_tmp, non_native_field_multiplication) UltraCircuitConstructor::non_native_field_witnesses inputs{ a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)), }; - const auto [lo_1_idx, hi_1_idx] = composer.queue_non_native_field_multiplication(inputs); + const auto [lo_1_idx, hi_1_idx] = composer.evaluate_non_native_field_multiplication(inputs); composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70); auto prover = composer.create_prover(); diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp index 1f3e9fb3ed3..ce0f4eac7b8 100644 --- a/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp +++ b/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp @@ -1138,7 +1138,30 @@ std::vector UltraComposer::decompose_into_default_range(const uint32_t const auto limb_idx = add_variable(sublimbs[i]); sublimb_indices.emplace_back(limb_idx); if ((i == sublimbs.size() - 1) && has_remainder_bits) { - create_new_range_constraint(limb_idx, last_limb_range); + if ((target_range_bitnum - last_limb_size) < DEFAULT_PLOOKUP_RANGE_CUTOFF_BITNUM) { + // we don't want to make a new range table. + // X = limb, L = last limb range, K = sublimb mask. L < X + // we want X <= L + // i.e. L - X >= 0 and L - X <= K + // equivalent to saying L - X <= K + // D = L - X + // D + X - L + barretenberg::fr diff = uint256_t(last_limb_range) - get_variable(limb_idx); + uint32_t diff_idx = add_variable(diff); + create_add_gate({ + .a = limb_idx, + .b = zero_idx, + .c = diff_idx, + .a_scaling = 1, + .b_scaling = 0, + .c_scaling = 1, + .const_scaling = -barretenberg::fr(last_limb_range), + }); + create_new_range_constraint(diff_idx, sublimb_mask); + create_new_range_constraint(limb_idx, sublimb_mask); + } else { + create_new_range_constraint(limb_idx, last_limb_range); + } } else { create_new_range_constraint(limb_idx, sublimb_mask); } @@ -1860,22 +1883,18 @@ std::array UltraComposer::decompose_non_native_field_double_width_l } /** - * @brief Queue up non-native field multiplication data. + * @brief Process a non-native field multiplication data. * - * @details The data queued represents a non-native field multiplication identity a * b = q * p + r, + * @details The data represents a non-native field multiplication identity a * b = q * p + r, * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables. * - * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would - * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove - * duplicates in the circuit finishing stage of the proving key computation. - * * The non-native field modulus, p, is a circuit constant * * The return value are the witness indices of the two remainder limbs `lo_1, hi_2` * * N.B.: This method does NOT evaluate the prime field component of non-native field multiplications. **/ -std::array UltraComposer::queue_non_native_field_multiplication( +std::array UltraComposer::evaluate_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder) { @@ -1903,10 +1922,11 @@ std::array UltraComposer::queue_non_native_field_multiplication( get_variable(input.r[2]), get_variable(input.r[3]), }; - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_RSHIFT = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_RSHIFT_2 = barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); @@ -1955,68 +1975,114 @@ std::array UltraComposer::queue_non_native_field_multiplication( range_constrain_two_limbs(input.q[2], input.q[3]); } - // Add witnesses into the multiplication cache - // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) - cached_non_native_field_multiplication cache_entry{ - .a = input.a, - .b = input.b, - .q = input.q, - .r = input.r, - .cross_terms = { lo_0_idx, lo_1_idx, hi_0_idx, hi_1_idx, hi_2_idx, hi_3_idx }, - .neg_modulus = input.neg_modulus, - }; - cached_non_native_field_multiplications.emplace_back(cache_entry); + // product gate 1 + // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 + create_big_add_gate({ input.q[0], + input.q[1], + input.r[1], + lo_1_idx, + input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, + input.neg_modulus[0] * LIMB_SHIFT, + -LIMB_SHIFT, + -LIMB_SHIFT.sqr(), + 0 }, + true); + + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[0]); + w_4.emplace_back(lo_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); + ++num_gates; + w_l.emplace_back(input.a[0]); + w_r.emplace_back(input.b[0]); + w_o.emplace_back(input.a[3]); + w_4.emplace_back(input.b[3]); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); + ++num_gates; + w_l.emplace_back(input.a[2]); + w_r.emplace_back(input.b[2]); + w_o.emplace_back(input.r[3]); + w_4.emplace_back(hi_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); + ++num_gates; + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[2]); + w_4.emplace_back(hi_1_idx); + apply_aux_selectors(AUX_SELECTORS::NONE); + ++num_gates; + + /** + * product gate 6 + * + * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 + * + **/ + create_big_add_gate( + { + input.q[2], + input.q[3], + lo_1_idx, + hi_1_idx, + -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], + -input.neg_modulus[0] * LIMB_SHIFT, + -1, + -1, + 0, + }, + true); + + /** + * product gate 7 + * + * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b + **/ + create_big_add_gate({ + hi_3_idx, + input.q[0], + input.q[1], + hi_2_idx, + -1, + input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, + input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, + LIMB_RSHIFT_2, + 0, + }); return std::array{ lo_1_idx, hi_3_idx }; } /** * @brief Called in `compute_proving_key` when finalizing circuit. - * Iterates over the cached_non_native_field_multiplication objects, + * Iterates over the cached_partial_non_native_field_multiplication objects, * removes duplicates, and instantiates the remainder as constraints` */ void UltraComposer::process_non_native_field_multiplications() { - std::sort(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); - - auto last = - std::unique(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); + for (size_t i = 0; i < cached_partial_non_native_field_multiplications.size(); ++i) { + auto& c = cached_partial_non_native_field_multiplications[i]; + for (size_t j = 0; j < 5; ++j) { + c.a[j] = real_variable_index[c.a[j]]; + c.b[j] = real_variable_index[c.b[j]]; + } + } + std::sort(cached_partial_non_native_field_multiplications.begin(), + cached_partial_non_native_field_multiplications.end()); - auto it = cached_non_native_field_multiplications.begin(); + auto last = std::unique(cached_partial_non_native_field_multiplications.begin(), + cached_partial_non_native_field_multiplications.end()); - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; - constexpr barretenberg::fr LIMB_RSHIFT = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); - constexpr barretenberg::fr LIMB_RSHIFT_2 = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); + auto it = cached_partial_non_native_field_multiplications.begin(); // iterate over the cached items and create constraints while (it != last) { const auto input = *it; - const uint32_t lo_0_idx = input.cross_terms.lo_0_idx; - const uint32_t lo_1_idx = input.cross_terms.lo_1_idx; - const uint32_t hi_0_idx = input.cross_terms.hi_0_idx; - const uint32_t hi_1_idx = input.cross_terms.hi_1_idx; - const uint32_t hi_2_idx = input.cross_terms.hi_2_idx; - const uint32_t hi_3_idx = input.cross_terms.hi_3_idx; - - // product gate 1 - // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 - create_big_add_gate({ input.q[0], - input.q[1], - input.r[1], - lo_1_idx, - input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, - input.neg_modulus[0] * LIMB_SHIFT, - -LIMB_SHIFT, - -LIMB_SHIFT.sqr(), - 0 }, - true); w_l.emplace_back(input.a[1]); w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[0]); - w_4.emplace_back(lo_0_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.lo_0); apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); ++num_gates; w_l.emplace_back(input.a[0]); @@ -2027,65 +2093,34 @@ void UltraComposer::process_non_native_field_multiplications() ++num_gates; w_l.emplace_back(input.a[2]); w_r.emplace_back(input.b[2]); - w_o.emplace_back(input.r[3]); - w_4.emplace_back(hi_0_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.hi_0); apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); ++num_gates; w_l.emplace_back(input.a[1]); w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[2]); - w_4.emplace_back(hi_1_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.hi_1); apply_aux_selectors(AUX_SELECTORS::NONE); ++num_gates; - - /** - * product gate 6 - * - * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 - * - **/ - create_big_add_gate( - { - input.q[2], - input.q[3], - lo_1_idx, - hi_1_idx, - -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], - -input.neg_modulus[0] * LIMB_SHIFT, - -1, - -1, - 0, - }, - true); - - /** - * product gate 7 - * - * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b - **/ - create_big_add_gate({ - hi_3_idx, - input.q[0], - input.q[1], - hi_2_idx, - -1, - input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, - input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, - LIMB_RSHIFT_2, - 0, - }); ++it; } } /** - * Compute the limb-multiplication part of a non native field mul + * @brief Queue the limb-multiplication part of a non native field mul * * i.e. compute the low 204 and high 204 bit components of `a * b` where `a, b` are nnf elements composed of 4 * limbs with size DEFAULT_NON_NATIVE_FIELD_LIMB_BITS * + * @details The data queued represents part of a non-native field multiplication identity a * b = q * p + r, + * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables. + * + * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would + * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove + * duplicates in the circuit finishing stage of the proving key computation. **/ -std::array UltraComposer::evaluate_partial_non_native_field_multiplication( +std::array UltraComposer::queue_partial_non_native_field_multiplication( const non_native_field_witnesses& input) { @@ -2113,30 +2148,16 @@ std::array UltraComposer::evaluate_partial_non_native_field_multipl const uint32_t hi_0_idx = add_variable(hi_0); const uint32_t hi_1_idx = add_variable(hi_1); - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(lo_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); - ++num_gates; - w_l.emplace_back(input.a[0]); - w_r.emplace_back(input.b[0]); - w_o.emplace_back(input.a[3]); - w_4.emplace_back(input.b[3]); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); - ++num_gates; - w_l.emplace_back(input.a[2]); - w_r.emplace_back(input.b[2]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(hi_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); - ++num_gates; - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(hi_1_idx); - apply_aux_selectors(AUX_SELECTORS::NONE); - ++num_gates; + // Add witnesses into the multiplication cache + // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) + cached_partial_non_native_field_multiplication cache_entry{ + .a = input.a, + .b = input.b, + .lo_0 = lo_0_idx, + .hi_0 = hi_0_idx, + .hi_1 = hi_1_idx, + }; + cached_partial_non_native_field_multiplications.emplace_back(cache_entry); return std::array{ lo_0_idx, hi_1_idx }; } diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp index 6f22b40bb00..eb0e5b0e92b 100644 --- a/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp +++ b/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp @@ -29,6 +29,11 @@ class UltraComposer : public ComposerBase { // large ranges such as 2^64. For such ranges the element will be decomposed into smaller // chuncks according to the parameter below static constexpr size_t DEFAULT_PLOOKUP_RANGE_BITNUM = 14; + // (DEFAULT_PLOOKUP_RANGE_BITNUM - DEFAULT_PLOOKUP_RANGE_CUTOFF_SIZE) = maximum size of range table that + // `decompose_into_default_range` will create in addition to the DEFAULT_PLOOKUP_RANGE_BITNUM table e.g. we don't + // want to create a range table of size (DEFAULT_PLOOKUP_RANGE_BITNUM - 1) if it contains very few entries; each + // table has a O(1 << bitnum) constraint cost to create + static constexpr size_t DEFAULT_PLOOKUP_RANGE_CUTOFF_BITNUM = 4; static constexpr size_t DEFAULT_PLOOKUP_RANGE_STEP_SIZE = 3; static constexpr size_t DEFAULT_PLOOKUP_RANGE_SIZE = (1 << DEFAULT_PLOOKUP_RANGE_BITNUM) - 1; static constexpr size_t DEFAULT_NON_NATIVE_FIELD_LIMB_BITS = 68; @@ -36,7 +41,7 @@ class UltraComposer : public ComposerBase { static constexpr size_t NUMBER_OF_GATES_PER_RAM_ACCESS = 2; static constexpr size_t NUMBER_OF_ARITHMETIC_GATES_PER_RAM_ARRAY = 1; // number of gates created per non-native field operation in process_non_native_field_multiplications - static constexpr size_t GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC = 7; + static constexpr size_t GATES_PER_PARTIAL_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC = 4; struct non_native_field_witnesses { // first 4 array elements = limbs // 5th element = prime basis limb @@ -58,30 +63,27 @@ class UltraComposer : public ComposerBase { }; /** - * @brief Used to store instructions to create non_native_field_multiplication gates. + * @brief Used to store instructions to create partial_non_native_field_multiplication gates. * We want to cache these (and remove duplicates) as the stdlib code can end up multiplying the same inputs * repeatedly. */ - struct cached_non_native_field_multiplication { + struct cached_partial_non_native_field_multiplication { std::array a; std::array b; - std::array q; - std::array r; - non_native_field_multiplication_cross_terms cross_terms; - std::array neg_modulus; + barretenberg::fr lo_0; + barretenberg::fr hi_0; + barretenberg::fr hi_1; - bool operator==(const cached_non_native_field_multiplication& other) const + bool operator==(const cached_partial_non_native_field_multiplication& other) const { bool valid = true; for (size_t i = 0; i < 5; ++i) { valid = valid && (a[i] == other.a[i]); valid = valid && (b[i] == other.b[i]); - valid = valid && (q[i] == other.q[i]); - valid = valid && (r[i] == other.r[i]); } return valid; } - bool operator<(const cached_non_native_field_multiplication& other) const + bool operator<(const cached_partial_non_native_field_multiplication& other) const { if (a < other.a) { return true; @@ -90,22 +92,13 @@ class UltraComposer : public ComposerBase { if (b < other.b) { return true; } - if (b == other.b) { - if (q < other.q) { - return true; - } - if (q == other.q) { - if (r < other.r) { - return true; - } - } - } } return false; } }; - std::vector cached_non_native_field_multiplications; + std::vector cached_partial_non_native_field_multiplications; + void process_non_native_field_multiplications(); enum AUX_SELECTORS { @@ -392,13 +385,23 @@ class UltraComposer : public ComposerBase { rangecount += ram_range_sizes[i]; } } - std::vector nnf_copy(cached_non_native_field_multiplications); + + std::vector pnnf_copy( + cached_partial_non_native_field_multiplications); + for (size_t i = 0; i < pnnf_copy.size(); ++i) { + auto& c = pnnf_copy[i]; + for (size_t j = 0; j < 5; ++j) { + c.a[j] = real_variable_index[c.a[j]]; + c.b[j] = real_variable_index[c.b[j]]; + } + } // update nnfcount - std::sort(nnf_copy.begin(), nnf_copy.end()); + std::sort(pnnf_copy.begin(), pnnf_copy.end()); + auto plast = std::unique(pnnf_copy.begin(), pnnf_copy.end()); - auto last = std::unique(nnf_copy.begin(), nnf_copy.end()); - const size_t num_nnf_ops = static_cast(std::distance(nnf_copy.begin(), last)); - nnfcount = num_nnf_ops * GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC; + nnfcount = static_cast(std::distance(pnnf_copy.begin(), plast)) * + GATES_PER_PARTIAL_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC; + ; } /** @@ -545,9 +548,9 @@ class UltraComposer : public ComposerBase { const size_t hi_limb_bits = DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); std::array decompose_non_native_field_double_width_limb( const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array queue_non_native_field_multiplication( + std::array evaluate_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true); - std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& input); + std::array queue_partial_non_native_field_multiplication(const non_native_field_witnesses& input); typedef std::pair scaled_witness; typedef std::tuple add_simple; std::array evaluate_non_native_field_subtraction( diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp index 0ec324aa474..0ac6de40dc5 100644 --- a/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp +++ b/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp @@ -649,7 +649,7 @@ TYPED_TEST(ultra_composer, non_native_field_multiplication) UltraComposer::non_native_field_witnesses inputs{ a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)), }; - const auto [lo_1_idx, hi_1_idx] = composer.queue_non_native_field_multiplication(inputs); + const auto [lo_1_idx, hi_1_idx] = composer.evaluate_non_native_field_multiplication(inputs); composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70); TestFixture::prove_and_verify(composer, /*expected_result=*/true); diff --git a/cpp/src/barretenberg/plonk/proof_system/verification_key/verification_key.cpp b/cpp/src/barretenberg/plonk/proof_system/verification_key/verification_key.cpp index f6cf92bd732..e66d3b56b8c 100644 --- a/cpp/src/barretenberg/plonk/proof_system/verification_key/verification_key.cpp +++ b/cpp/src/barretenberg/plonk/proof_system/verification_key/verification_key.cpp @@ -51,43 +51,34 @@ barretenberg::fr compress_native_evaluation_domain(barretenberg::evaluation_doma */ barretenberg::fr verification_key_data::compress_native(const size_t hash_index) { - barretenberg::evaluation_domain domain = evaluation_domain(circuit_size); - barretenberg::fr compressed_domain = - compress_native_evaluation_domain(domain, proof_system::ComposerType(composer_type)); - - constexpr size_t num_limb_bits = plonk::NUM_LIMB_BITS_IN_FIELD_SIMULATION; - - const auto split_bigfield_limbs = [](const uint256_t& element) { - std::vector limbs; - limbs.push_back(element.slice(0, num_limb_bits)); - limbs.push_back(element.slice(num_limb_bits, num_limb_bits * 2)); - limbs.push_back(element.slice(num_limb_bits * 2, num_limb_bits * 3)); - limbs.push_back(element.slice(num_limb_bits * 3, num_limb_bits * 4)); - return limbs; - }; - - std::vector preimage_data; - preimage_data.emplace_back(composer_type); - preimage_data.emplace_back(compressed_domain); - preimage_data.emplace_back(num_public_inputs); + barretenberg::evaluation_domain eval_domain = evaluation_domain(circuit_size); + + std::vector preimage_data; + + preimage_data.push_back(static_cast(proof_system::ComposerType(composer_type))); + + const uint256_t domain = eval_domain.domain; + const uint256_t generator = eval_domain.generator; + const uint256_t public_inputs = num_public_inputs; + + ASSERT(domain < (uint256_t(1) << 32)); + ASSERT(generator < (uint256_t(1) << 16)); + ASSERT(public_inputs < (uint256_t(1) << 32)); + + write(preimage_data, static_cast(uint256_t(generator))); + write(preimage_data, static_cast(uint256_t(domain))); + write(preimage_data, static_cast(public_inputs)); for (const auto& [tag, selector] : commitments) { - const auto x_limbs = split_bigfield_limbs(selector.x); - const auto y_limbs = split_bigfield_limbs(selector.y); - - preimage_data.push_back(x_limbs[0]); - preimage_data.push_back(x_limbs[1]); - preimage_data.push_back(x_limbs[2]); - preimage_data.push_back(x_limbs[3]); - - preimage_data.push_back(y_limbs[0]); - preimage_data.push_back(y_limbs[1]); - preimage_data.push_back(y_limbs[2]); - preimage_data.push_back(y_limbs[3]); + write(preimage_data, selector.y); + write(preimage_data, selector.x); } + write(preimage_data, eval_domain.root); + barretenberg::fr compressed_key; - if (proof_system::ComposerType(composer_type) == proof_system::ComposerType::PLOOKUP) { - compressed_key = crypto::pedersen_commitment::lookup::compress_native(preimage_data, hash_index); + if (proof_system::ComposerType(composer_type) == ComposerType::PLOOKUP) { + compressed_key = from_buffer( + crypto::pedersen_commitment::lookup::compress_native(preimage_data, hash_index)); } else { compressed_key = crypto::pedersen_commitment::compress_native(preimage_data, hash_index); } diff --git a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp index 569ad0ca7f2..52812f354c9 100644 --- a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp +++ b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp @@ -1355,7 +1355,7 @@ std::array UltraCircuitConstructor::decompose_non_native_field_doub * @details The data queued represents a non-native field multiplication identity a * b = q * p + r, * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables. * - * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would + * Without this queue some functions, such as proof_system::plonk::stdlib::element::multiple_montgomery_ladder, would * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove * duplicates in the circuit finishing stage of the proving key computation. * @@ -1365,7 +1365,7 @@ std::array UltraCircuitConstructor::decompose_non_native_field_doub * * N.B.: This method does NOT evaluate the prime field component of non-native field multiplications. **/ -std::array UltraCircuitConstructor::queue_non_native_field_multiplication( +std::array UltraCircuitConstructor::evaluate_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder) { @@ -1393,10 +1393,11 @@ std::array UltraCircuitConstructor::queue_non_native_field_multipli get_variable(input.r[2]), get_variable(input.r[3]), }; - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_RSHIFT = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_RSHIFT_2 = barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); @@ -1444,17 +1445,81 @@ std::array UltraCircuitConstructor::queue_non_native_field_multipli range_constrain_two_limbs(input.q[0], input.q[1]); range_constrain_two_limbs(input.q[2], input.q[3]); } - // Add witnesses into the multiplication cache - // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) - cached_non_native_field_multiplication cache_entry{ - .a = input.a, - .b = input.b, - .q = input.q, - .r = input.r, - .cross_terms = { lo_0_idx, lo_1_idx, hi_0_idx, hi_1_idx, hi_2_idx, hi_3_idx }, - .neg_modulus = input.neg_modulus, - }; - cached_non_native_field_multiplications.emplace_back(cache_entry); + + // product gate 1 + // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 + create_big_add_gate({ input.q[0], + input.q[1], + input.r[1], + lo_1_idx, + input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, + input.neg_modulus[0] * LIMB_SHIFT, + -LIMB_SHIFT, + -LIMB_SHIFT.sqr(), + 0 }, + true); + + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[0]); + w_4.emplace_back(lo_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); + ++num_gates; + w_l.emplace_back(input.a[0]); + w_r.emplace_back(input.b[0]); + w_o.emplace_back(input.a[3]); + w_4.emplace_back(input.b[3]); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); + ++num_gates; + w_l.emplace_back(input.a[2]); + w_r.emplace_back(input.b[2]); + w_o.emplace_back(input.r[3]); + w_4.emplace_back(hi_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); + ++num_gates; + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[2]); + w_4.emplace_back(hi_1_idx); + apply_aux_selectors(AUX_SELECTORS::NONE); + ++num_gates; + + /** + * product gate 6 + * + * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 + * + **/ + create_big_add_gate( + { + input.q[2], + input.q[3], + lo_1_idx, + hi_1_idx, + -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], + -input.neg_modulus[0] * LIMB_SHIFT, + -1, + -1, + 0, + }, + true); + + /** + * product gate 7 + * + * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b + **/ + create_big_add_gate({ + hi_3_idx, + input.q[0], + input.q[1], + hi_2_idx, + -1, + input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, + input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, + LIMB_RSHIFT_2, + 0, + }); return std::array{ lo_1_idx, hi_3_idx }; } @@ -1466,46 +1531,29 @@ std::array UltraCircuitConstructor::queue_non_native_field_multipli */ void UltraCircuitConstructor::process_non_native_field_multiplications() { - std::sort(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); - - auto last = - std::unique(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); + for (size_t i = 0; i < cached_partial_non_native_field_multiplications.size(); ++i) { + auto& c = cached_partial_non_native_field_multiplications[i]; + for (size_t j = 0; j < 5; ++j) { + c.a[j] = real_variable_index[c.a[j]]; + c.b[j] = real_variable_index[c.b[j]]; + } + } + std::sort(cached_partial_non_native_field_multiplications.begin(), + cached_partial_non_native_field_multiplications.end()); - auto it = cached_non_native_field_multiplications.begin(); + auto last = std::unique(cached_partial_non_native_field_multiplications.begin(), + cached_partial_non_native_field_multiplications.end()); - constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; - constexpr barretenberg::fr LIMB_RSHIFT = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); - constexpr barretenberg::fr LIMB_RSHIFT_2 = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); + auto it = cached_partial_non_native_field_multiplications.begin(); // iterate over the cached items and create constraints while (it != last) { const auto input = *it; - const uint32_t lo_0_idx = input.cross_terms.lo_0_idx; - const uint32_t lo_1_idx = input.cross_terms.lo_1_idx; - const uint32_t hi_0_idx = input.cross_terms.hi_0_idx; - const uint32_t hi_1_idx = input.cross_terms.hi_1_idx; - const uint32_t hi_2_idx = input.cross_terms.hi_2_idx; - const uint32_t hi_3_idx = input.cross_terms.hi_3_idx; - - // product gate 1 - // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 - create_big_add_gate({ input.q[0], - input.q[1], - input.r[1], - lo_1_idx, - input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, - input.neg_modulus[0] * LIMB_SHIFT, - -LIMB_SHIFT, - -LIMB_SHIFT.sqr(), - 0 }, - true); w_l.emplace_back(input.a[1]); w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[0]); - w_4.emplace_back(lo_0_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.lo_0); apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); ++num_gates; w_l.emplace_back(input.a[0]); @@ -1516,53 +1564,16 @@ void UltraCircuitConstructor::process_non_native_field_multiplications() ++num_gates; w_l.emplace_back(input.a[2]); w_r.emplace_back(input.b[2]); - w_o.emplace_back(input.r[3]); - w_4.emplace_back(hi_0_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.hi_0); apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); ++num_gates; w_l.emplace_back(input.a[1]); w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[2]); - w_4.emplace_back(hi_1_idx); + w_o.emplace_back(zero_idx); + w_4.emplace_back(input.hi_1); apply_aux_selectors(AUX_SELECTORS::NONE); ++num_gates; - - /** - * product gate 6 - * - * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 - * - **/ - create_big_add_gate( - { - input.q[2], - input.q[3], - lo_1_idx, - hi_1_idx, - -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], - -input.neg_modulus[0] * LIMB_SHIFT, - -1, - -1, - 0, - }, - true); - - /** - * product gate 7 - * - * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b - **/ - create_big_add_gate({ - hi_3_idx, - input.q[0], - input.q[1], - hi_2_idx, - -1, - input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, - input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, - LIMB_RSHIFT_2, - 0, - }); ++it; } } @@ -1574,7 +1585,7 @@ void UltraCircuitConstructor::process_non_native_field_multiplications() * limbs with size DEFAULT_NON_NATIVE_FIELD_LIMB_BITS * **/ -std::array UltraCircuitConstructor::evaluate_partial_non_native_field_multiplication( +std::array UltraCircuitConstructor::queue_partial_non_native_field_multiplication( const non_native_field_witnesses& input) { @@ -1602,30 +1613,16 @@ std::array UltraCircuitConstructor::evaluate_partial_non_native_fie const uint32_t hi_0_idx = add_variable(hi_0); const uint32_t hi_1_idx = add_variable(hi_1); - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(lo_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); - ++num_gates; - w_l.emplace_back(input.a[0]); - w_r.emplace_back(input.b[0]); - w_o.emplace_back(input.a[3]); - w_4.emplace_back(input.b[3]); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); - ++num_gates; - w_l.emplace_back(input.a[2]); - w_r.emplace_back(input.b[2]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(hi_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); - ++num_gates; - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(zero_idx); - w_4.emplace_back(hi_1_idx); - apply_aux_selectors(AUX_SELECTORS::NONE); - ++num_gates; + // Add witnesses into the multiplication cache + // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) + cached_partial_non_native_field_multiplication cache_entry{ + .a = input.a, + .b = input.b, + .lo_0 = lo_0_idx, + .hi_0 = hi_0_idx, + .hi_1 = hi_1_idx, + }; + cached_partial_non_native_field_multiplications.emplace_back(cache_entry); return std::array{ lo_0_idx, hi_1_idx }; } diff --git a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp index 84b18b13bcd..21bc9523932 100644 --- a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp +++ b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp @@ -167,45 +167,29 @@ class UltraCircuitConstructor : public CircuitConstructorBase ultra_selector_names() - { - std::vector result{ "q_m", "q_c", "q_1", "q_2", "q_3", "q_4", - "q_arith", "q_sort", "q_elliptic", "q_aux", "table_type" }; - return result; - } - struct non_native_field_multiplication_cross_terms { - uint32_t lo_0_idx; - uint32_t lo_1_idx; - uint32_t hi_0_idx; - uint32_t hi_1_idx; - uint32_t hi_2_idx; - uint32_t hi_3_idx; - }; /** - * @brief Used to store instructions to create non_native_field_multiplication gates. + * @brief Used to store instructions to create partial_non_native_field_multiplication gates. * We want to cache these (and remove duplicates) as the stdlib code can end up multiplying the same inputs * repeatedly. */ - struct cached_non_native_field_multiplication { + struct cached_partial_non_native_field_multiplication { std::array a; std::array b; - std::array q; - std::array r; - non_native_field_multiplication_cross_terms cross_terms; - std::array neg_modulus; + barretenberg::fr lo_0; + barretenberg::fr hi_0; + barretenberg::fr hi_1; - bool operator==(const cached_non_native_field_multiplication& other) const + bool operator==(const cached_partial_non_native_field_multiplication& other) const { bool valid = true; for (size_t i = 0; i < 5; ++i) { valid = valid && (a[i] == other.a[i]); valid = valid && (b[i] == other.b[i]); - valid = valid && (q[i] == other.q[i]); - valid = valid && (r[i] == other.r[i]); } return valid; } - bool operator<(const cached_non_native_field_multiplication& other) const + + bool operator<(const cached_partial_non_native_field_multiplication& other) const { if (a < other.a) { return true; @@ -214,27 +198,33 @@ class UltraCircuitConstructor : public CircuitConstructorBase ultra_selector_names() + { + std::vector result{ "q_m", "q_c", "q_1", "q_2", "q_3", "q_4", + "q_arith", "q_sort", "q_elliptic", "q_aux", "table_type" }; + return result; + } + struct non_native_field_multiplication_cross_terms { + uint32_t lo_0_idx; + uint32_t lo_1_idx; + uint32_t hi_0_idx; + uint32_t hi_1_idx; + uint32_t hi_2_idx; + uint32_t hi_3_idx; + }; + /** - * @brief CircuitDataBackup is a structure we use to store all the information about the circuit that is needed to - * restore it back to a pre-finalized state - * @details In check_circuit method in UltraCircuitConstructor we want to check that the whole circuit works, but - * ultra circuits need to have ram, rom and range gates added in the end for the check to be complete as well as the - * set permutation check, so we finalize the circuit when we check it. This structure allows us to restore the - * circuit to the state before the finalization. + * @brief CircuitDataBackup is a structure we use to store all the information about the circuit that is needed + * to restore it back to a pre-finalized state + * @details In check_circuit method in UltraCircuitConstructor we want to check that the whole circuit works, + * but ultra circuits need to have ram, rom and range gates added in the end for the check to be complete as + * well as the set permutation check, so we finalize the circuit when we check it. This structure allows us to + * restore the circuit to the state before the finalization. */ struct CircuitDataBackup { std::vector public_inputs; @@ -272,8 +262,8 @@ class UltraCircuitConstructor : public CircuitConstructorBase memory_write_records; std::map range_lists; - std::vector - cached_non_native_field_multiplications; + std::vector + cached_partial_non_native_field_multiplications; size_t num_gates; bool circuit_finalised = false; @@ -326,14 +316,14 @@ class UltraCircuitConstructor : public CircuitConstructorBaserange_lists; stored_state.circuit_finalised = circuit_constructor->circuit_finalised; stored_state.num_gates = circuit_constructor->num_gates; - stored_state.cached_non_native_field_multiplications = - circuit_constructor->cached_non_native_field_multiplications; + stored_state.cached_partial_non_native_field_multiplications = + circuit_constructor->cached_partial_non_native_field_multiplications; return stored_state; } @@ -398,7 +388,8 @@ class UltraCircuitConstructor : public CircuitConstructorBaserange_lists = range_lists; circuit_constructor->circuit_finalised = circuit_finalised; circuit_constructor->num_gates = num_gates; - circuit_constructor->cached_non_native_field_multiplications = cached_non_native_field_multiplications; + circuit_constructor->cached_partial_non_native_field_multiplications = + cached_partial_non_native_field_multiplications; circuit_constructor->w_l.resize(num_gates); circuit_constructor->w_r.resize(num_gates); circuit_constructor->w_o.resize(num_gates); @@ -511,8 +502,8 @@ class UltraCircuitConstructor : public CircuitConstructorBase ram_arrays; @@ -572,7 +564,7 @@ class UltraCircuitConstructor : public CircuitConstructorBase memory_write_records; - std::vector cached_non_native_field_multiplications; + std::vector cached_partial_non_native_field_multiplications; void process_non_native_field_multiplications(); @@ -638,11 +630,11 @@ class UltraCircuitConstructor : public CircuitConstructorBase size imbalance between sorted and non-sorted sets. Checking for this - * and throwing an error would require a refactor of the Composer to catelog all 'orphan' variables not - * assigned to gates. + * this range constraint will increase the size of the 'sorted set' of range-constrained integers + *by 1. The 'non-sorted set' of range-constrained integers is a subset of the wire indices of all + *arithmetic gates. No arithemtic gate => size imbalance between sorted and non-sorted sets. Checking + *for this and throwing an error would require a refactor of the Composer to catelog all 'orphan' + *variables not assigned to gates. **/ create_new_range_constraint(variable_index, 1ULL << num_bits, msg); } else { @@ -704,8 +696,8 @@ class UltraCircuitConstructor : public CircuitConstructorBase decompose_non_native_field_double_width_limb( const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array queue_non_native_field_multiplication( + std::array evaluate_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true); - std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& input); + std::array queue_partial_non_native_field_multiplication(const non_native_field_witnesses& input); typedef std::pair scaled_witness; typedef std::tuple add_simple; std::array evaluate_non_native_field_subtraction( diff --git a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.test.cpp b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.test.cpp index aea6e78bde7..1b9706e7569 100644 --- a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.test.cpp +++ b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.test.cpp @@ -643,7 +643,7 @@ TEST(ultra_circuit_constructor, non_native_field_multiplication) proof_system::UltraCircuitConstructor::non_native_field_witnesses inputs{ a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)), }; - const auto [lo_1_idx, hi_1_idx] = circuit_constructor.queue_non_native_field_multiplication(inputs); + const auto [lo_1_idx, hi_1_idx] = circuit_constructor.evaluate_non_native_field_multiplication(inputs); circuit_constructor.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70); auto saved_state = UltraCircuitConstructor::CircuitDataBackup::store_full_state(circuit_constructor); diff --git a/cpp/src/barretenberg/solidity_helpers/circuits/recursive_circuit.hpp b/cpp/src/barretenberg/solidity_helpers/circuits/recursive_circuit.hpp index a6dcc0b87c3..7274600b849 100644 --- a/cpp/src/barretenberg/solidity_helpers/circuits/recursive_circuit.hpp +++ b/cpp/src/barretenberg/solidity_helpers/circuits/recursive_circuit.hpp @@ -125,7 +125,7 @@ template class RecursiveCircuit { throw_or_abort("inner proof result != 1"); } - circuit_output.aggregation_state.add_proof_outputs_as_public_inputs(); + circuit_output.aggregation_state.assign_object_to_proof_outputs(); if (outer_composer.failed()) { throw_or_abort("outer composer failed"); diff --git a/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.cpp b/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.cpp index 6dbc70e5d17..c5ba72f8d35 100644 --- a/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.cpp +++ b/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.cpp @@ -14,19 +14,34 @@ using namespace plookup; using namespace barretenberg; template -point pedersen_plookup_commitment::compress_to_point(const field_t& left, const field_t& right) +point pedersen_plookup_commitment::compress_to_point(const field_t& left, + const field_t& right, + const bool skip_rhs_range_check) { auto p2 = pedersen_plookup_hash::hash_single(left, false); - auto p1 = pedersen_plookup_hash::hash_single(right, true); + auto p1 = pedersen_plookup_hash::hash_single(right, true, skip_rhs_range_check); return pedersen_plookup_hash::add_points(p1, p2); } -template field_t pedersen_plookup_commitment::compress(const field_t& left, const field_t& right) +template +field_t pedersen_plookup_commitment::compress(const field_t& left, + const field_t& right, + const bool skip_rhs_range_check) { - return compress_to_point(left, right).x; + return compress_to_point(left, right, skip_rhs_range_check).x; } +/** + * @brief Compress a vector of field elements into a grumpkin point. + * This serves as the basis for a collision-resistant hash function. + * Note that this does NOT produce a hash that can be modelled as a random oracle. + * + * @tparam C + * @param inputs + * @param iv initialization vector + * @return point + */ template point pedersen_plookup_commitment::merkle_damgard_compress(const std::vector& inputs, const field_t& iv) { @@ -34,13 +49,19 @@ point pedersen_plookup_commitment::merkle_damgard_compress(const std::vect return point{ 0, 0 }; } + // The first two inputs to the Merkle-Damgard construction are the initialization vector and the number of elements + // being hashed. Including the length ensures that hashes of different lengths cannot collide. Starting the hash + // with these 2 inputs is optimal in the case that the IV is constant. i.e. the 1st 3 calls to `hash_single` are + // over constants and cost no constraints. r = H(iv, num_inputs) is constant and the 1st half of H(r, inputs[0]) is + // also constant auto result = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_IV, iv)[ColumnIdx::C2][0]; auto num_inputs = inputs.size(); - for (size_t i = 0; i < num_inputs; i++) { + result = compress(result, field_t(num_inputs)); + for (size_t i = 0; i < num_inputs - 1; i++) { result = compress(result, inputs[i]); } - return compress_to_point(result, field_t(num_inputs)); + return compress_to_point(result, inputs[num_inputs - 1]); } template @@ -53,7 +74,9 @@ point pedersen_plookup_commitment::merkle_damgard_compress(const std::vect } auto result = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_IV, 0)[ColumnIdx::C2][0]; - for (size_t i = 0; i < 2 * num_inputs; i++) { + result = compress(result, field_t(num_inputs)); + + for (size_t i = 0; i < 2 * num_inputs - 1; i++) { if ((i & 1) == 0) { auto iv_result = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_IV, ivs[i >> 1])[ColumnIdx::C2][0]; @@ -63,7 +86,25 @@ point pedersen_plookup_commitment::merkle_damgard_compress(const std::vect } } - return compress_to_point(result, field_t(num_inputs)); + return compress_to_point(result, inputs[num_inputs - 1]); +} + +template +point pedersen_plookup_commitment::merkle_damgard_compress_with_relaxed_range_constraints( + const std::vector& inputs, const field_t& iv) +{ + if (inputs.size() == 0) { + return point{ 0, 0 }; + } + + auto result = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_IV, iv)[ColumnIdx::C2][0]; + auto num_inputs = inputs.size(); + result = compress(result, field_t(num_inputs)); + for (size_t i = 0; i < num_inputs - 1; i++) { + result = compress(result, inputs[i], true); + } + + return compress_to_point(result, inputs[num_inputs - 1], true); } template @@ -102,6 +143,13 @@ point pedersen_plookup_commitment::commit(const std::vector& inpu return merkle_damgard_compress(inputs, field_t(hash_index)); } +template +point pedersen_plookup_commitment::commit_with_relaxed_range_constraints(const std::vector& inputs, + const size_t hash_index) +{ + return merkle_damgard_compress_with_relaxed_range_constraints(inputs, field_t(hash_index)); +} + template point pedersen_plookup_commitment::commit(const std::vector& inputs, const std::vector& hash_indices) @@ -114,6 +162,24 @@ point pedersen_plookup_commitment::commit(const std::vector& inpu return merkle_damgard_compress(inputs, hash_indices_); } +/** + * @brief Calls `compress` but instructs the Pedersen hash method `hash_single` + * to not apply range constraints on the input elements. + * + * Use this method when the input elements are known to be <= 2^252 + * + * @tparam C + * @param inputs + * @param hash_index + * @return field_t + */ +template +field_t pedersen_plookup_commitment::compress_with_relaxed_range_constraints(const std::vector& inputs, + const size_t hash_index) +{ + return commit_with_relaxed_range_constraints(inputs, hash_index).x; +} + template field_t pedersen_plookup_commitment::compress(const std::vector& inputs, const size_t hash_index) { diff --git a/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.hpp b/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.hpp index 42cc8252d23..90076a3035f 100644 --- a/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.hpp +++ b/cpp/src/barretenberg/stdlib/commitment/pedersen/pedersen_plookup.hpp @@ -17,14 +17,18 @@ template class pedersen_plookup_commitment { public: static point commit(const std::vector& inputs, const size_t hash_index = 0); static point commit(const std::vector& inputs, const std::vector& hash_indices); + static point commit_with_relaxed_range_constraints(const std::vector& inputs, const size_t hash_index = 0); - static field_t compress(const field_t& left, const field_t& right); + static field_t compress(const field_t& left, const field_t& right, const bool skip_rhs_range_check = false); static field_t compress(const std::vector& inputs, const size_t hash_index = 0); static field_t compress(const packed_byte_array& input) { return compress(input.get_limbs()); } static field_t compress(const std::vector& inputs, const std::vector& hash_indices); static field_t compress(const std::vector>& input_pairs); + static field_t compress_with_relaxed_range_constraints(const std::vector& inputs, + const size_t hash_index = 0); + template static field_t compress(const std::array& inputs) { std::vector in(inputs.begin(), inputs.end()); @@ -33,9 +37,12 @@ template class pedersen_plookup_commitment { static point merkle_damgard_compress(const std::vector& inputs, const field_t& iv); static point merkle_damgard_compress(const std::vector& inputs, const std::vector& ivs); + static point merkle_damgard_compress_with_relaxed_range_constraints(const std::vector& inputs, + const field_t& iv); + static point merkle_damgard_tree_compress(const std::vector& inputs, const std::vector& ivs); - static point compress_to_point(const field_t& left, const field_t& right); + static point compress_to_point(const field_t& left, const field_t& right, const bool skip_rhs_range_check = false); }; extern template class pedersen_plookup_commitment; diff --git a/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.cpp b/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.cpp index 2801648f540..9b5e84cda0c 100644 --- a/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.cpp +++ b/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.cpp @@ -79,7 +79,8 @@ point pedersen_plookup_hash::add_points(const point& p1, const point& p2, /** * Hash a single field element using lookup tables. */ -template point pedersen_plookup_hash::hash_single(const field_t& scalar, const bool parity) +template +point pedersen_plookup_hash::hash_single(const field_t& scalar, const bool parity, const bool skip_range_check) { if (scalar.is_constant()) { C* ctx = scalar.get_context(); @@ -93,6 +94,10 @@ template point pedersen_plookup_hash::hash_single(const field const field_t y_lo = witness_t(ctx, uint256_t(scalar.get_value()).slice(0, 126)); ReadData lookup_hi, lookup_lo; + + // If `skip_range_check = true`, this implies the input scalar is 252 bits maximum. + // i.e. we do not require a check that scalar slice sums < p . + // We can also likely use a multitable with 1 less lookup if (parity) { lookup_lo = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_RIGHT_LO, y_lo); lookup_hi = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_RIGHT_HI, y_hi); @@ -101,17 +106,35 @@ template point pedersen_plookup_hash::hash_single(const field lookup_hi = plookup_read::get_lookup_accumulators(MultiTableId::PEDERSEN_LEFT_HI, y_hi); } - // Check if (r_hi - y_hi) is 128 bits and if (r_hi - y_hi) == 0, then - // (r_lo - y_lo) must be 126 bits. - constexpr uint256_t modulus = fr::modulus; - const field_t r_lo = witness_t(ctx, modulus.slice(0, 126)); - const field_t r_hi = witness_t(ctx, modulus.slice(126, 256)); + // validate slices equal scalar + // TODO(suyash?): can remove this gate if we use a single lookup accumulator for HI + LO combined + // can recover y_hi, y_lo from Column 1 of the the lookup accumulator output + scalar.add_two(-y_hi * (uint256_t(1) << 126), -y_lo).assert_equal(0); + + // if skip_range_check = true we assume input max size is 252 bits => final lookup scalar slice value must be 0 + if (skip_range_check) { + lookup_hi[ColumnIdx::C1][lookup_hi[ColumnIdx::C1].size() - 1].assert_equal(0); + } + if (!skip_range_check) { + // Check that y_hi * 2^126 + y_lo < fr::modulus when evaluated over the integers + constexpr uint256_t modulus = fr::modulus; + const field_t r_lo = field_t(ctx, modulus.slice(0, 126)); + const field_t r_hi = field_t(ctx, modulus.slice(126, 256)); - const field_t term_hi = r_hi - y_hi; - const field_t term_lo = (r_lo - y_lo) * field_t(term_hi == field_t(0)); - term_hi.normalize().create_range_constraint(128); - term_lo.normalize().create_range_constraint(126); + bool need_borrow = (uint256_t(y_lo.get_value()) > uint256_t(r_lo.get_value())); + field_t borrow = field_t::from_witness(ctx, need_borrow); + // directly call `create_new_range_constraint` to avoid creating an arithmetic gate + scalar.get_context()->create_new_range_constraint(borrow.get_witness_index(), 1, "borrow"); + + // Hi range check = r_hi - y_hi - borrow + // Lo range check = r_lo - y_lo + borrow * 2^{126} + field_t hi = (r_hi - y_hi) - borrow; + field_t lo = (r_lo - y_lo) + (borrow * (uint256_t(1) << 126)); + + hi.create_range_constraint(128); + lo.create_range_constraint(126); + } const size_t num_lookups_lo = lookup_lo[ColumnIdx::C1].size(); const size_t num_lookups_hi = lookup_hi[ColumnIdx::C1].size(); diff --git a/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.hpp b/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.hpp index 5d099c5c3fa..2467e3fdb57 100644 --- a/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.hpp +++ b/cpp/src/barretenberg/stdlib/hash/pedersen/pedersen_plookup.hpp @@ -23,7 +23,7 @@ template class pedersen_plookup_hash { public: static point add_points(const point& p1, const point& p2, const AddType add_type = ONE); - static point hash_single(const field_t& in, const bool parity); + static point hash_single(const field_t& in, const bool parity, const bool skip_range_check = false); static field_t hash_multiple(const std::vector& in, const size_t hash_index = 0); }; diff --git a/cpp/src/barretenberg/stdlib/primitives/bigfield/bigfield_impl.hpp b/cpp/src/barretenberg/stdlib/primitives/bigfield/bigfield_impl.hpp index d324bcc4aff..96d4c022df5 100644 --- a/cpp/src/barretenberg/stdlib/primitives/bigfield/bigfield_impl.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/bigfield/bigfield_impl.hpp @@ -593,18 +593,19 @@ template bigfield bigfield::operator-(const result.binary_basis_limbs[3].element = binary_basis_limbs[3].element + barretenberg::fr(to_add_3); if constexpr (C::type == ComposerType::PLOOKUP) { - if (result.prime_basis_limb.multiplicative_constant == 1 && - other.prime_basis_limb.multiplicative_constant == 1 && !result.is_constant() && !other.is_constant()) { + if (prime_basis_limb.multiplicative_constant == 1 && other.prime_basis_limb.multiplicative_constant == 1 && + !is_constant() && !other.is_constant()) { bool limbconst = result.binary_basis_limbs[0].element.is_constant(); limbconst = limbconst || result.binary_basis_limbs[1].element.is_constant(); limbconst = limbconst || result.binary_basis_limbs[2].element.is_constant(); limbconst = limbconst || result.binary_basis_limbs[3].element.is_constant(); - limbconst = limbconst || result.prime_basis_limb.is_constant(); + limbconst = limbconst || prime_basis_limb.is_constant(); limbconst = limbconst || other.binary_basis_limbs[0].element.is_constant(); limbconst = limbconst || other.binary_basis_limbs[1].element.is_constant(); limbconst = limbconst || other.binary_basis_limbs[2].element.is_constant(); limbconst = limbconst || other.binary_basis_limbs[3].element.is_constant(); limbconst = limbconst || other.prime_basis_limb.is_constant(); + limbconst = limbconst || (prime_basis_limb.witness_index == other.prime_basis_limb.witness_index); if (!limbconst) { std::pair x0{ result.binary_basis_limbs[0].element.witness_index, binary_basis_limbs[0].element.multiplicative_constant }; @@ -631,10 +632,11 @@ template bigfield bigfield::operator-(const barretenberg::fr c3(result.binary_basis_limbs[3].element.additive_constant - other.binary_basis_limbs[3].element.additive_constant); - uint32_t xp(result.prime_basis_limb.witness_index); + uint32_t xp(prime_basis_limb.witness_index); uint32_t yp(other.prime_basis_limb.witness_index); - barretenberg::fr cp(result.prime_basis_limb.additive_constant - - other.prime_basis_limb.additive_constant); + barretenberg::fr cp(prime_basis_limb.additive_constant - other.prime_basis_limb.additive_constant); + uint512_t constant_to_add_mod_p = (constant_to_add) % prime_basis.modulus; + cp += barretenberg::fr(constant_to_add_mod_p.lo); const auto output_witnesses = ctx->evaluate_non_native_field_subtraction( { x0, y0, c0 }, { x1, y1, c1 }, { x2, y2, c2 }, { x3, y3, c3 }, { xp, yp, cp }); @@ -1982,7 +1984,7 @@ void bigfield::unsafe_evaluate_multiply_add(const bigfield& input_left, modulus, }; // N.B. this method also evaluates the prime field component of the non-native field mul - const auto [lo_idx, hi_idx] = ctx->queue_non_native_field_multiplication(witnesses, false); + const auto [lo_idx, hi_idx] = ctx->evaluate_non_native_field_multiplication(witnesses, false); barretenberg::fr neg_prime = -barretenberg::fr(uint256_t(target_basis.modulus)); field_t::evaluate_polynomial_identity(left.prime_basis_limb, @@ -2267,7 +2269,7 @@ void bigfield::unsafe_evaluate_multiple_multiply_add(const std::vector> limb_0_accumulator; std::vector> limb_2_accumulator; std::vector> prime_limb_accumulator; @@ -2320,7 +2322,7 @@ void bigfield::unsafe_evaluate_multiple_multiply_add(const std::vectorevaluate_partial_non_native_field_multiplication(mul_witnesses); + const auto [lo_2_idx, hi_2_idx] = ctx->queue_partial_non_native_field_multiplication(mul_witnesses); field_t lo_2 = field_t::from_witness_index(ctx, lo_2_idx); field_t hi_2 = field_t::from_witness_index(ctx, hi_2_idx); @@ -2416,7 +2418,7 @@ void bigfield::unsafe_evaluate_multiple_multiply_add(const std::vectorqueue_non_native_field_multiplication(witnesses, false); + const auto [lo_1_idx, hi_1_idx] = ctx->evaluate_non_native_field_multiplication(witnesses, false); barretenberg::fr neg_prime = -barretenberg::fr(uint256_t(target_basis.modulus)); diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp index 29d8e472c14..4330c5b6b45 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.hpp @@ -50,15 +50,15 @@ template class element { void validate_on_curve() const { - Fq xx = x.sqr(); - Fq rhs = y.sqr(); Fq b(get_context(), uint256_t(NativeGroup::curve_b)); - Fq lhs = xx.madd(x, { b }); - if constexpr (NativeGroup::has_a) { + if constexpr (!NativeGroup::has_a) { + // we validate y^2 = x^3 + b by setting "fix_remainder_zero = true" when calling mult_madd + Fq::mult_madd({ x.sqr(), y }, { x, -y }, { b }, true); + } else { Fq a(get_context(), uint256_t(NativeGroup::curve_a)); - lhs = lhs + (a * x); + // we validate y^2 = x^3 + ax + b by setting "fix_remainder_zero = true" when calling mult_madd + Fq::mult_madd({ x.sqr(), x, y }, { -x, a, y }, { b }, true); } - lhs.assert_equal(rhs); } static element one(Composer* ctx) @@ -99,6 +99,7 @@ template class element { *this = *this - other; return *this; } + std::array add_sub(const element& other) const; element operator*(const Fr& other) const; @@ -139,7 +140,7 @@ template class element { bool is_element = false; chain_add_accumulator(){}; - explicit chain_add_accumulator(element& input) + explicit chain_add_accumulator(const element& input) { x3_prev = input.x; y3_prev = input.y; @@ -161,10 +162,8 @@ template class element { element montgomery_ladder(const element& other) const; element montgomery_ladder(const chain_add_accumulator& accumulator); - element double_montgomery_ladder(const element& add1, const element& add2) const; - element double_montgomery_ladder(const chain_add_accumulator& add1, const element& add2) const; - element double_montgomery_ladder(const chain_add_accumulator& add1, const chain_add_accumulator& add2) const; - element double_into_montgomery_ladder(const element& to_add) const; + element multiple_montgomery_ladder(const std::vector& to_add) const; + element quadruple_and_add(const std::vector& to_add) const; typename NativeGroup::affine_element get_value() const { @@ -256,12 +255,13 @@ template class element { template ::value>> static std::array, 5> create_group_element_rom_tables( - const std::array& elements); + const std::array& elements, std::array& limb_max); template ::value>> static element read_group_element_rom_tables(const std::array, 5>& tables, - const field_t& index); + const field_t& index, + const std::array& limb_max); static std::pair compute_offset_generators(const size_t num_rounds); @@ -277,6 +277,7 @@ template class element { element operator[](const size_t idx) const { return element_table[idx]; } std::array element_table; std::array, 5> coordinates; + std::array limb_max; // tracks the maximum limb size represented in each element_table entry }; template ::value>> @@ -310,7 +311,6 @@ template class element { P1.element_table[i] = P1.element_table[i - 1] + d2; } for (size_t i = 0; i < 8; ++i) { - // TODO: DO WE NEED TO REDUCE THESE ELEMENTS???? P1.element_table[i] = (-P1.element_table[15 - i]); } for (size_t i = 0; i < 16; ++i) { @@ -322,8 +322,8 @@ template class element { endoP1.element_table[i].x = P1.element_table[i].x * beta; endoP1.element_table[15 - i].x = endoP1.element_table[i].x; } - P1.coordinates = create_group_element_rom_tables<16>(P1.element_table); - endoP1.coordinates = create_group_element_rom_tables<16>(endoP1.element_table); + P1.coordinates = create_group_element_rom_tables<16>(P1.element_table, P1.limb_max); + endoP1.coordinates = create_group_element_rom_tables<16>(endoP1.element_table, endoP1.limb_max); auto result = std::make_pair, four_bit_table_plookup<>>( (four_bit_table_plookup<>)P1, (four_bit_table_plookup<>)endoP1); return result; @@ -391,6 +391,7 @@ template class element { std::array element_table; std::array, 5> coordinates; + std::array limb_max; }; using twin_lookup_table = typename std:: @@ -418,10 +419,10 @@ template class element { endo_table.element_table[i + 8].x = base_table[7 - i].x * beta; endo_table.element_table[i + 8].y = base_table[7 - i].y; - endo_table.element_table[7 - i] = (-endo_table.element_table[i + 8]).reduce(); + endo_table.element_table[7 - i] = (-endo_table.element_table[i + 8]); } - endo_table.coordinates = create_group_element_rom_tables<16>(endo_table.element_table); + endo_table.coordinates = create_group_element_rom_tables<16>(endo_table.element_table, endo_table.limb_max); } else { std::array endo_inputs(inputs); for (auto& input : endo_inputs) { @@ -451,10 +452,10 @@ template class element { endo_table.element_table[i + 16].x = base_table[15 - i].x * beta; endo_table.element_table[i + 16].y = base_table[15 - i].y; - endo_table.element_table[15 - i] = (-endo_table.element_table[i + 16]).reduce(); + endo_table.element_table[15 - i] = (-endo_table.element_table[i + 16]); } - endo_table.coordinates = create_group_element_rom_tables<32>(endo_table.element_table); + endo_table.coordinates = create_group_element_rom_tables<32>(endo_table.element_table, endo_table.limb_max); } return std::make_pair, lookup_table_plookup<5>>((lookup_table_plookup<5>)base_table, (lookup_table_plookup<5>)endo_table); @@ -472,11 +473,16 @@ template class element { num_points = points.size(); num_fives = num_points / 5; + // size-6 table is expensive and only benefits us if creating them reduces the number of total tables if (num_fives * 5 == (num_points - 1)) { num_fives -= 1; num_sixes = 1; - } else { - num_sixes = 0; + } else if (num_fives * 5 == (num_points - 2) && num_fives >= 2) { + num_fives -= 2; + num_sixes = 2; + } else if (num_fives * 5 == (num_points - 3) && num_fives >= 3) { + num_fives -= 3; + num_sixes = 3; } has_quad = ((num_fives * 5 + num_sixes * 6) < num_points - 3) && (num_points >= 4); @@ -490,33 +496,40 @@ template class element { has_singleton = num_points != ((num_fives * 5 + num_sixes * 6) + ((size_t)has_quad * 4) + ((size_t)has_triple * 3) + ((size_t)has_twin * 2)); + size_t offset = 0; + for (size_t i = 0; i < num_sixes; ++i) { + six_tables.push_back(lookup_table_plookup<6>({ + points[offset + 6 * i], + points[offset + 6 * i + 1], + points[offset + 6 * i + 2], + points[offset + 6 * i + 3], + points[offset + 6 * i + 4], + points[offset + 6 * i + 5], + })); + } + offset += 6 * num_sixes; for (size_t i = 0; i < num_fives; ++i) { - five_tables.push_back(lookup_table_plookup<5>( - { points[5 * i], points[5 * i + 1], points[5 * i + 2], points[5 * i + 3], points[5 * i + 4] })); - } - - if (num_sixes == 1) { - six_tables.push_back(lookup_table_plookup<6>({ points[5 * num_fives], - points[5 * num_fives + 1], - points[5 * num_fives + 2], - points[5 * num_fives + 3], - points[5 * num_fives + 4], - points[5 * num_fives + 5] })); + five_tables.push_back(lookup_table_plookup<5>({ + points[offset + 5 * i], + points[offset + 5 * i + 1], + points[offset + 5 * i + 2], + points[offset + 5 * i + 3], + points[offset + 5 * i + 4], + })); } + offset += 5 * num_fives; if (has_quad) { - quad_tables.push_back(quad_lookup_table({ points[5 * num_fives], - points[5 * num_fives + 1], - points[5 * num_fives + 2], - points[5 * num_fives + 3] })); + quad_tables.push_back( + quad_lookup_table({ points[offset], points[offset + 1], points[offset + 2], points[offset + 3] })); } if (has_triple) { - triple_tables.push_back(triple_lookup_table( - { points[5 * num_fives], points[5 * num_fives + 1], points[5 * num_fives + 2] })); + triple_tables.push_back( + triple_lookup_table({ points[offset], points[offset + 1], points[offset + 2] })); } if (has_twin) { - twin_tables.push_back(twin_lookup_table({ points[5 * num_fives], points[5 * num_fives + 1] })); + twin_tables.push_back(twin_lookup_table({ points[offset], points[offset + 1] })); } if (has_singleton) { @@ -587,37 +600,36 @@ template class element { element::chain_add_accumulator get_chain_add_accumulator(std::vector>& naf_entries) const { std::vector round_accumulator; + for (size_t j = 0; j < num_sixes; ++j) { + round_accumulator.push_back(six_tables[j].get({ naf_entries[6 * j], + naf_entries[6 * j + 1], + naf_entries[6 * j + 2], + naf_entries[6 * j + 3], + naf_entries[6 * j + 4], + naf_entries[6 * j + 5] })); + } + size_t offset = num_sixes * 6; for (size_t j = 0; j < num_fives; ++j) { - round_accumulator.push_back(five_tables[j].get({ naf_entries[5 * j], - naf_entries[5 * j + 1], - naf_entries[5 * j + 2], - naf_entries[5 * j + 3], - naf_entries[5 * j + 4] })); - } - - if (num_sixes == 1) { - round_accumulator.push_back(six_tables[0].get({ naf_entries[num_fives * 5], - naf_entries[num_fives * 5 + 1], - naf_entries[num_fives * 5 + 2], - naf_entries[num_fives * 5 + 3], - naf_entries[num_fives * 5 + 4], - naf_entries[num_fives * 5 + 5] })); + round_accumulator.push_back(five_tables[j].get({ naf_entries[offset + j * 5], + naf_entries[offset + j * 5 + 1], + naf_entries[offset + j * 5 + 2], + naf_entries[offset + j * 5 + 3], + naf_entries[offset + j * 5 + 4] })); } - + offset += num_fives * 5; if (has_quad) { - round_accumulator.push_back(quad_tables[0].get({ naf_entries[num_fives * 5], - naf_entries[num_fives * 5 + 1], - naf_entries[num_fives * 5 + 2], - naf_entries[num_fives * 5 + 3] })); + round_accumulator.push_back(quad_tables[0].get({ naf_entries[offset], + naf_entries[offset + 1], + naf_entries[offset + 2], + naf_entries[offset + 3] })); } if (has_triple) { - round_accumulator.push_back(triple_tables[0].get( - { naf_entries[num_fives * 5], naf_entries[num_fives * 5 + 1], naf_entries[num_fives * 5 + 2] })); + round_accumulator.push_back( + triple_tables[0].get({ naf_entries[offset], naf_entries[offset + 1], naf_entries[offset + 2] })); } if (has_twin) { - round_accumulator.push_back( - twin_tables[0].get({ naf_entries[num_fives * 5], naf_entries[num_fives * 5 + 1] })); + round_accumulator.push_back(twin_tables[0].get({ naf_entries[offset], naf_entries[offset + 1] })); } if (has_singleton) { round_accumulator.push_back(singletons[0].conditional_negate(naf_entries[num_points - 1])); @@ -640,37 +652,37 @@ template class element { element get(std::vector>& naf_entries) const { std::vector round_accumulator; - for (size_t j = 0; j < num_fives; ++j) { - round_accumulator.push_back(five_tables[j].get({ naf_entries[5 * j], - naf_entries[5 * j + 1], - naf_entries[5 * j + 2], - naf_entries[5 * j + 3], - naf_entries[5 * j + 4] })); + for (size_t j = 0; j < num_sixes; ++j) { + round_accumulator.push_back(six_tables[j].get({ naf_entries[6 * j], + naf_entries[6 * j + 1], + naf_entries[6 * j + 2], + naf_entries[6 * j + 3], + naf_entries[6 * j + 4], + naf_entries[6 * j + 5] })); } + size_t offset = num_sixes * 6; - if (num_sixes == 1) { - round_accumulator.push_back(six_tables[0].get({ naf_entries[num_fives * 5], - naf_entries[num_fives * 5 + 1], - naf_entries[num_fives * 5 + 2], - naf_entries[num_fives * 5 + 3], - naf_entries[num_fives * 5 + 4], - naf_entries[num_fives * 5 + 5] })); + for (size_t j = 0; j < num_fives; ++j) { + round_accumulator.push_back(five_tables[j].get({ naf_entries[offset + 5 * j], + naf_entries[offset + 5 * j + 1], + naf_entries[offset + 5 * j + 2], + naf_entries[offset + 5 * j + 3], + naf_entries[offset + 5 * j + 4] })); } + offset += num_fives * 5; + if (has_quad) { - round_accumulator.push_back(quad_tables[0].get(naf_entries[num_fives * 5], - naf_entries[num_fives * 5 + 1], - naf_entries[num_fives * 5 + 2], - naf_entries[num_fives * 5 + 3])); + round_accumulator.push_back(quad_tables[0].get( + naf_entries[offset], naf_entries[offset + 1], naf_entries[offset + 2], naf_entries[offset + 3])); } if (has_triple) { - round_accumulator.push_back(triple_tables[0].get( - naf_entries[num_fives * 5], naf_entries[num_fives * 5 + 1], naf_entries[num_fives * 5 + 2])); + round_accumulator.push_back( + triple_tables[0].get(naf_entries[offset], naf_entries[offset + 1], naf_entries[offset + 2])); } if (has_twin) { - round_accumulator.push_back( - twin_tables[0].get(naf_entries[num_fives * 5], naf_entries[num_fives * 5 + 1])); + round_accumulator.push_back(twin_tables[0].get(naf_entries[offset], naf_entries[offset + 1])); } if (has_singleton) { round_accumulator.push_back(singletons[0].conditional_negate(naf_entries[num_points - 1])); @@ -862,67 +874,6 @@ template class element { return element::chain_add_end(accumulator); } - // chain_add_accumulator get_chain_initial_entry() const - // { - // std::vector add_accumulator; - // for (size_t i = 0; i < num_quads; ++i) { - // add_accumulator.push_back(quad_tables[i][0]); - // } - // if (has_twin) { - // add_accumulator.push_back(twin_tables[0][0]); - // } - // if (has_triple) { - // add_accumulator.push_back(triple_tables[0][0]); - // } - // if (has_singleton) { - // add_accumulator.push_back(singletons[0]); - // } - // if (add_accumulator.size() >= 2) { - // chain_add_accumulator output = element::chain_add_start(add_accumulator[0], add_accumulator[1]); - // for (size_t i = 2; i < add_accumulator.size(); ++i) { - // output = element::chain_add(add_accumulator[i], output); - // } - // return output; - // } - // return chain_add_accumulator(add_accumulator[0]); - // } - - // element::chain_add_accumulator get_chain_add_accumulator(std::vector>& naf_entries) const - // { - // std::vector round_accumulator; - // for (size_t j = 0; j < num_quads; ++j) { - // round_accumulator.push_back(quad_tables[j].get( - // naf_entries[4 * j], naf_entries[4 * j + 1], naf_entries[4 * j + 2], naf_entries[4 * j + 3])); - // } - - // if (has_triple) { - // round_accumulator.push_back(triple_tables[0].get( - // naf_entries[num_quads * 4], naf_entries[num_quads * 4 + 1], naf_entries[num_quads * 4 + 2])); - // } - // if (has_twin) { - // round_accumulator.push_back( - // twin_tables[0].get(naf_entries[num_quads * 4], naf_entries[num_quads * 4 + 1])); - // } - // if (has_singleton) { - // round_accumulator.push_back(singletons[0].conditional_negate(naf_entries[num_points - 1])); - // } - - // element::chain_add_accumulator accumulator; - // if (round_accumulator.size() == 1) { - // accumulator.x3_prev = round_accumulator[0].x; - // accumulator.y3_prev = round_accumulator[0].y; - // accumulator.is_element = true; - // return accumulator; - // } else if (round_accumulator.size() == 2) { - // return element::chain_add_start(round_accumulator[0], round_accumulator[1]); - // } else { - // accumulator = element::chain_add_start(round_accumulator[0], round_accumulator[1]); - // for (size_t j = 2; j < round_accumulator.size(); ++j) { - // accumulator = element::chain_add(round_accumulator[j], accumulator); - // } - // } - // return (accumulator); - // } std::vector quad_tables; std::vector triple_tables; std::vector twin_tables; diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.test.cpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.test.cpp index e3b6576e9fc..85b4e2325ca 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.test.cpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup.test.cpp @@ -415,7 +415,7 @@ template class stdlib_biggroup : public testing::Test { EXPECT_VERIFICATION(composer); } - static void test_double_montgomery_ladder() + static void test_multiple_montgomery_ladder() { Composer composer = Composer(); size_t num_repetitions = 10; @@ -423,19 +423,17 @@ template class stdlib_biggroup : public testing::Test { affine_element acc_small(element::random_element()); element_ct acc_big = element_ct::from_witness(&composer, acc_small); - affine_element add_1_small_0(element::random_element()); - element_ct add_1_big_0 = element_ct::from_witness(&composer, add_1_small_0); - affine_element add_2_small_0(element::random_element()); - element_ct add_2_big_0 = element_ct::from_witness(&composer, add_2_small_0); - - affine_element add_1_small_1(element::random_element()); - element_ct add_1_big_1 = element_ct::from_witness(&composer, add_1_small_1); - affine_element add_2_small_1(element::random_element()); - element_ct add_2_big_1 = element_ct::from_witness(&composer, add_2_small_1); - - typename element_ct::chain_add_accumulator add_1 = element_ct::chain_add_start(add_1_big_0, add_1_big_1); - typename element_ct::chain_add_accumulator add_2 = element_ct::chain_add_start(add_2_big_0, add_2_big_1); - acc_big.double_montgomery_ladder(add_1, add_2); + std::vector to_add; + for (size_t j = 0; j < i; ++j) { + affine_element add_1_small_0(element::random_element()); + element_ct add_1_big_0 = element_ct::from_witness(&composer, add_1_small_0); + affine_element add_2_small_0(element::random_element()); + element_ct add_2_big_0 = element_ct::from_witness(&composer, add_2_small_0); + typename element_ct::chain_add_accumulator add_1 = + element_ct::chain_add_start(add_1_big_0, add_2_big_0); + to_add.emplace_back(add_1); + } + acc_big.multiple_montgomery_ladder(to_add); } EXPECT_VERIFICATION(composer); @@ -890,10 +888,10 @@ HEAVY_TYPED_TEST(stdlib_biggroup, chain_add) TestFixture::test_chain_add(); } -HEAVY_TYPED_TEST(stdlib_biggroup, double_montgomery_ladder) +HEAVY_TYPED_TEST(stdlib_biggroup, multiple_montgomery_ladder) { - TestFixture::test_double_montgomery_ladder(); + TestFixture::test_multiple_montgomery_ladder(); } HEAVY_TYPED_TEST(stdlib_biggroup, compute_naf) diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_batch_mul.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_batch_mul.hpp index c237d0dfbb7..ebeb0aee5b1 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_batch_mul.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_batch_mul.hpp @@ -41,14 +41,11 @@ element element::wnaf_batch_mul(const std::vector to_add; + for (size_t j = 0; j < points.size(); ++j) { + to_add.emplace_back(point_tables[j][wnaf_entries[j][i]]); } - // accumulator = accumulator.dbl(); - // accumulator = accumulator.montgomery_ladder(to_add); - accumulator = accumulator.double_into_montgomery_ladder(to_add); + accumulator = accumulator.quadruple_and_add(to_add); } for (size_t i = 0; i < points.size(); ++i) { diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_bn254.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_bn254.hpp index 834a1b01ef1..924cd0d7ecd 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_bn254.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_bn254.hpp @@ -117,34 +117,45 @@ element element::bn254_endo_batch_mul_with_generator return to_add; }; - for (size_t i = 1; i < num_rounds / 2; ++i) { + // Perform multiple rounds of the montgomery ladder algoritm per "iteration" of our main loop. + // This is in order to reduce the number of field reductions required when calling `multiple_montgomery_ladder` + constexpr size_t num_rounds_per_iteration = 4; - auto add_1 = get_point_to_add(i * 2 - 1); - auto add_2 = get_point_to_add(i * 2); + // we require that we perform max of one generator per iteration + static_assert(num_rounds_per_iteration < 8); - // TODO update this to work if num_bits is odd - if ((i * 2) % 8 == 0) { - add_1 = element::chain_add(generator_table[generator_wnaf[(i * 2 - 8) / 8]], add_1); - add_1 = element::chain_add(generator_endo_table[generator_endo_wnaf[(i * 2 - 8) / 8]], add_1); - } - if (!add_1.is_element) { - accumulator = accumulator.double_montgomery_ladder(add_1, add_2); - } else { - accumulator = accumulator.double_montgomery_ladder(element(add_1.x3_prev, add_1.y3_prev), - element(add_2.x3_prev, add_2.y3_prev)); + size_t num_iterations = num_rounds / num_rounds_per_iteration; + num_iterations += ((num_iterations * num_rounds_per_iteration) == num_rounds) ? 0 : 1; + const size_t num_rounds_per_final_iteration = + (num_rounds - 1) - ((num_iterations - 1) * num_rounds_per_iteration); + + size_t generator_idx = 0; + for (size_t i = 0; i < num_iterations; ++i) { + + const size_t inner_num_rounds = + (i != num_iterations - 1) ? num_rounds_per_iteration : num_rounds_per_final_iteration; + std::vector to_add; + + for (size_t j = 0; j < inner_num_rounds; ++j) { + to_add.emplace_back(get_point_to_add(i * num_rounds_per_iteration + j + 1)); } - } - if ((num_rounds & 0x01ULL) == 0x00ULL) { - auto add_1 = get_point_to_add(num_rounds - 1); - add_1 = element::chain_add(generator_table[generator_wnaf[generator_wnaf.size() - 2]], add_1); - add_1 = element::chain_add(generator_endo_table[generator_endo_wnaf[generator_wnaf.size() - 2]], add_1); - if (add_1.is_element) { - element temp(add_1.x3_prev, add_1.y3_prev); - accumulator = accumulator.montgomery_ladder(temp); - } else { - accumulator = accumulator.montgomery_ladder(add_1); + bool add_generator_this_round = false; + size_t add_idx = 0; + for (size_t j = 0; j < inner_num_rounds; ++j) { + add_generator_this_round = ((i * num_rounds_per_iteration + j) % 8) == 6; + if (add_generator_this_round) { + add_idx = j; + break; + } + } + if (add_generator_this_round) { + to_add[add_idx] = element::chain_add(generator_table[generator_wnaf[generator_idx]], to_add[add_idx]); + to_add[add_idx] = + element::chain_add(generator_endo_table[generator_endo_wnaf[generator_idx]], to_add[add_idx]); + generator_idx++; } + accumulator = accumulator.multiple_montgomery_ladder(to_add); } for (size_t i = 0; i < small_points.size(); ++i) { @@ -333,12 +344,12 @@ element element::bn254_endo_batch_mul(const std::vec * 1. Extract NAF value for bit `2*i - 1` for each scalar multiplier and store in `nafs` vector. * 2. Use `nafs` vector to derive the point that we need (`add_1`) to add into our accumulator. * 3. Repeat the above 2 steps but for bit `2 * i` (`add_2`) - * 4. Compute `accumulator = 4 * accumulator + 2 * add_1 + add_2` using `double_montgomery_ladder` method + * 4. Compute `accumulator = 4 * accumulator + 2 * add_1 + add_2` using `multiple_montgomery_ladder` method * * The purpose of the above is to minimize the number of required range checks (vs a simple double and add algo). * - * When computing two iterations of the montgomery ladder algorithm, we can neglect computing the y-coordinate of - *the 1st ladder output. See `double_montgomery_ladder` for more details. + * When computing repeated iterations of the montgomery ladder algorithm, we can neglect computing the y-coordinate + *of each ladder output. See `multiple_montgomery_ladder` for more details. **/ for (size_t i = 1; i < num_rounds / 2; ++i) { // `nafs` tracks the naf value for each point for the current round @@ -365,14 +376,8 @@ element element::bn254_endo_batch_mul(const std::vec } element::chain_add_accumulator add_2 = point_table.get_chain_add_accumulator(nafs); - // Perform the double montgomery ladder. We need to convert our chain_add_accumulator types into regular - // elements if the accumuator does not contain a y-coordinate - if (!add_1.is_element) { - accumulator = accumulator.double_montgomery_ladder(add_1, add_2); - } else { - accumulator = accumulator.double_montgomery_ladder(element(add_1.x3_prev, add_1.y3_prev), - element(add_2.x3_prev, add_2.y3_prev)); - } + // Perform the double montgomery ladder. + accumulator = accumulator.multiple_montgomery_ladder({ add_1, add_2 }); } // we need to iterate 1 more time if the number of rounds is even @@ -382,12 +387,7 @@ element element::bn254_endo_batch_mul(const std::vec nafs.emplace_back(naf_entries[j][num_rounds - 1]); } element::chain_add_accumulator add_1 = point_table.get_chain_add_accumulator(nafs); - if (add_1.is_element) { - element temp(add_1.x3_prev, add_1.y3_prev); - accumulator = accumulator.montgomery_ladder(temp); - } else { - accumulator = accumulator.montgomery_ladder(add_1); - } + accumulator = accumulator.multiple_montgomery_ladder({ add_1 }); } /** diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp index 78d6f378554..bff1805db3c 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_impl.hpp @@ -80,6 +80,39 @@ element element::operator-(const element& other) con return element(x_3, y_3); } + +/** + * @brief Compute (*this) + other AND (*this) - other as a size-2 array + * + * @details We require this operation when computing biggroup lookup tables for + * multi-scalar-multiplication. This combined method reduces the number of + * field additions, field subtractions required (as well as 1 less assert_is_not_equal check) + * + * @tparam C + * @tparam Fq + * @tparam Fr + * @tparam G + * @param other + * @return std::array, 2> + */ +template +std::array, 2> element::add_sub(const element& other) const +{ + other.x.assert_is_not_equal(x); + + const Fq denominator = other.x - x; + const Fq x2x1 = -(other.x + x); + + const Fq lambda1 = Fq::div_without_denominator_check({ other.y, -y }, denominator); + const Fq x_3 = lambda1.sqradd({ x2x1 }); + const Fq y_3 = lambda1.madd(x - x_3, { -y }); + const Fq lambda2 = Fq::div_without_denominator_check({ -other.y, -y }, denominator); + const Fq x_4 = lambda2.sqradd({ x2x1 }); + const Fq y_4 = lambda2.madd(x - x_4, { -y }); + + return { element(x_3, y_3), element(x_4, y_4) }; +} + template element element::dbl() const { Fq two_x = x + x; @@ -294,200 +327,217 @@ element element::montgomery_ladder(const chain_add_a } /** - * Compute (4 * (*this)) + (2 * add1) + add2 - * If we chain two iterations of the montgomery ladder together, we can squeeze out a non-native field reduction. + * @brief Compute 4.P + to_add[0] + ... + to_add[to_add.size() - 1] * - * Total number of field reductions = 9 + * @details Used in wnaf_batch_mul method. Combining operations requires fewer bigfield reductions. * - * Two calls to mont ladder woud require 10 + * Method computes R[i] = (2P + A[0]) + (2P + A[1]) + A[2] + ... + A[n-1] * - * Using doublings and additions would require 12! - **/ + * @tparam C + * @tparam Fq + * @tparam Fr + * @tparam G + * @param to_add + * @return element + */ template -element element::double_montgomery_ladder(const element& add1, const element& add2) const +element element::quadruple_and_add(const std::vector& to_add) const { - add1.x.assert_is_not_equal(x); - const Fq lambda_1 = Fq::div_without_denominator_check({ add1.y, -y }, (add1.x - x)); - - const Fq x_3 = lambda_1.sqradd({ -add1.x, -x }); - - const Fq minus_lambda_2 = - lambda_1 + Fq::div_without_denominator_check({ y + y }, (x_3 - x)); // (y + y) / (x_3 - x); - - const Fq x_4 = minus_lambda_2.sqradd({ -x, -x_3 }); - - // We can avoid computing y_4, instead substituting the expression `minus_lambda_2 * (x_4 - x) - y` where needed. - // This is cheaper, because we can evaluate two field multiplications (or a field multiplication + a field division) - // with only one non-native field reduction. - // E.g. evaluating (a * b) + (c * d) = e mod p only requires 1 quotient and remainder. - // Defining the quotient and remainder elements is the major cost of a non-native field multiplication - // because each requires ~256 bits of range checks - const Fq x_sub_x4 = x - x_4; - - const Fq x4_sub_add2x = x_4 - add2.x; + const Fq two_x = x + x; + Fq x_1; + Fq minus_lambda_dbl; + if constexpr (G::has_a) { + Fq a(get_context(), uint256_t(G::curve_a)); + minus_lambda_dbl = Fq::msub_div({ x }, { (two_x + x) }, (y + y), { a }); + x_1 = minus_lambda_dbl.sqradd({ -(two_x) }); + } else { + minus_lambda_dbl = Fq::msub_div({ x }, { (two_x + x) }, (y + y), {}); + x_1 = minus_lambda_dbl.sqradd({ -(two_x) }); + } - // msub_div; 'compute a multiplication and a division and multiply the two together. Requires only 1 non native - // field reduction` - const Fq lambda_3 = Fq::msub_div({ minus_lambda_2 }, { (x_sub_x4) }, (x4_sub_add2x), { y, add2.y }); + ASSERT(to_add.size() > 0); + to_add[0].x.assert_is_not_equal(x_1); - // validate we can use incomplete addition formulae - x_4.assert_is_not_equal(add2.x); + const Fq x_minus_x_1 = x - x_1; - const Fq x_5 = lambda_3.sqradd({ -x_4, -add2.x }); - const Fq x5_sub_x4 = x_5 - x_4; + const Fq lambda_1 = Fq::msub_div({ minus_lambda_dbl }, { x_minus_x_1 }, (x_1 - to_add[0].x), { to_add[0].y, y }); - const Fq half_minus_lambda_4_minus_lambda_3 = Fq::msub_div({ minus_lambda_2 }, { x_sub_x4 }, (x5_sub_x4), { y }); + const Fq x_3 = lambda_1.sqradd({ -to_add[0].x, -x_1 }); - const Fq minus_lambda_4_minus_lambda_3 = half_minus_lambda_4_minus_lambda_3 + half_minus_lambda_4_minus_lambda_3; - const Fq minus_lambda_4 = minus_lambda_4_minus_lambda_3 + lambda_3; - const Fq x_6 = minus_lambda_4.sqradd({ -x_4, -x_5 }); + const Fq half_minus_lambda_2_minus_lambda_1 = + Fq::msub_div({ minus_lambda_dbl }, { x_minus_x_1 }, (x_3 - x_1), { y }); - const Fq x6_sub_x4 = x_6 - x_4; + const Fq minus_lambda_2_minus_lambda_1 = half_minus_lambda_2_minus_lambda_1 + half_minus_lambda_2_minus_lambda_1; + const Fq minus_lambda_2 = minus_lambda_2_minus_lambda_1 + lambda_1; - // y_6 = -L_4 * (x_6 - x_4) - L_2 * (x - x_4) + y - const Fq y_6 = Fq::dual_madd(minus_lambda_4, (x6_sub_x4), minus_lambda_2, x_sub_x4, { y }); + const Fq x_4 = minus_lambda_2.sqradd({ -x_1, -x_3 }); - return element(x_6, y_6); -} + const Fq x_4_sub_x_1 = x_4 - x_1; -/** - * If we chain two iterations of the montgomery ladder together, we can squeeze out a non-native field reduction - * - **/ -template -element element::double_montgomery_ladder(const chain_add_accumulator& add1, - const element& add2) const -{ - if (add1.is_element) { - throw_or_abort("An accumulator expected"); + if (to_add.size() == 1) { + const Fq y_4 = Fq::dual_madd(minus_lambda_2, x_4_sub_x_1, minus_lambda_dbl, x_minus_x_1, { y }); + return element(x_4, y_4); } - add1.x3_prev.assert_is_not_equal(x); - Fq lambda_1 = Fq::msub_div( - { add1.lambda_prev }, { (add1.x1_prev - add1.x3_prev) }, (x - add1.x3_prev), { -add1.y1_prev, -y }); - - const Fq x_3 = lambda_1.sqradd({ -add1.x3_prev, -x }); - - const Fq minus_lambda_2 = - lambda_1 + Fq::div_without_denominator_check({ y + y }, (x_3 - x)); // (y + y) / (x_3 - x); - - const Fq x_4 = minus_lambda_2.sqradd({ -x, -x_3 }); - - // We can avoid computing y_4, instead substituting the expression `minus_lambda_2 * (x_4 - x) - y` where needed. - // This is cheaper, because we can evaluate two field multiplications (or a field multiplication + a field division) - // with only one non-native field reduction. - // E.g. evaluating (a * b) + (c * d) = e mod p only requires 1 quotient and remainder, which is the major cost - // of a non-native field multiplication - const Fq x_sub_x4 = x - x_4; + to_add[1].x.assert_is_not_equal(to_add[0].x); - const Fq x4_sub_add2x = x_4 - add2.x; - const Fq lambda_3 = Fq::msub_div({ minus_lambda_2 }, { (x_sub_x4) }, (x4_sub_add2x), { y, add2.y }); + Fq minus_lambda_3 = Fq::msub_div( + { minus_lambda_dbl, minus_lambda_2 }, { x_minus_x_1, x_4_sub_x_1 }, (x_4 - to_add[1].x), { y, -(to_add[1].y) }); - x_4.assert_is_not_equal(add2.x); + // X5 = L3.L3 - X4 - XB + const Fq x_5 = minus_lambda_3.sqradd({ -x_4, -to_add[1].x }); - const Fq x_5 = lambda_3.sqradd({ -x_4, -add2.x }); - const Fq x5_sub_x4 = x_5 - x_4; + if (to_add.size() == 2) { + // Y5 = L3.(XB - X5) - YB + const Fq y_5 = minus_lambda_3.madd(x_5 - to_add[1].x, { -to_add[1].y }); + return element(x_5, y_5); + } - const Fq half_minus_lambda_4_minus_lambda_3 = Fq::msub_div({ minus_lambda_2 }, { x_sub_x4 }, (x5_sub_x4), { y }); + Fq x_prev = x_5; + Fq minus_lambda_prev = minus_lambda_3; - const Fq minus_lambda_4_minus_lambda_3 = half_minus_lambda_4_minus_lambda_3 + half_minus_lambda_4_minus_lambda_3; - const Fq minus_lambda_4 = minus_lambda_4_minus_lambda_3 + lambda_3; - const Fq x_6 = minus_lambda_4.sqradd({ -x_4, -x_5 }); + for (size_t i = 2; i < to_add.size(); ++i) { - const Fq x6_sub_x4 = x_6 - x_4; + to_add[i].x.assert_is_not_equal(to_add[i - 1].x); + // Lambda = Yprev - Yadd[i] / Xprev - Xadd[i] + // = -Lprev.(Xprev - Xadd[i-1]) - Yadd[i - 1] - Yadd[i] / Xprev - Xadd[i] + const Fq minus_lambda = Fq::msub_div({ minus_lambda_prev }, + { to_add[i - 1].x - x_prev }, + (to_add[i].x - x_prev), + { to_add[i - 1].y, to_add[i].y }); + // X = Lambda * Lambda - Xprev - Xadd[i] + const Fq x_out = minus_lambda.sqradd({ -x_prev, -to_add[i].x }); - const Fq y_6 = Fq::dual_madd(minus_lambda_4, (x6_sub_x4), minus_lambda_2, x_sub_x4, { y }); + x_prev = x_out; + minus_lambda_prev = minus_lambda; + } + const Fq y_out = minus_lambda_prev.madd(x_prev - to_add[to_add.size() - 1].x, { -to_add[to_add.size() - 1].y }); - return element(x_6, y_6); + return element(x_prev, y_out); } /** - * If we chain two iterations of the montgomery ladder together, we can squeeze out a non-native field reduction + * @brief Perform repeated iterations of the montgomery ladder algorithm. * - **/ + * For points P, Q, montgomery ladder computes R = (P + Q) + P + * i.e. it's "double-and-add" without explicit doublings. + * + * This method can apply repeated iterations of the montgomery ladder. + * Each iteration reduces the number of field multiplications by 1, at the cost of more additions. + * (i.e. we don't compute intermediate y-coordinates). + * + * The number of additions scales with the size of the input vector. The optimal input size appears to be 4. + * + * @tparam C + * @tparam Fq + * @tparam Fr + * @tparam G + * @param add + * @return element + */ template -element element::double_montgomery_ladder(const chain_add_accumulator& add1, - const chain_add_accumulator& add2) const +element element::multiple_montgomery_ladder( + const std::vector& add) const { - if ((add1.is_element) || (add2.is_element)) { - throw_or_abort("An accumulator expected"); - } - add1.x3_prev.assert_is_not_equal(x); - // add1.y = lambda_prev * (x1_prev - x3_prev) - y1_prev - Fq lambda_1 = Fq::msub_div( - { add1.lambda_prev }, { (add1.x1_prev - add1.x3_prev) }, (x - add1.x3_prev), { -add1.y1_prev, -y }); - - const Fq x_3 = lambda_1.sqradd({ -add1.x3_prev, -x }); - - const Fq minus_lambda_2 = - lambda_1 + Fq::div_without_denominator_check({ y + y }, (x_3 - x)); // (y + y) / (x_3 - x); - - const Fq x_4 = minus_lambda_2.sqradd({ -x, -x_3 }); - - // We can avoid computing y_4, instead substituting the expression `minus_lambda_2 * (x_4 - x) - y` where needed. - // This is cheaper, because we can evaluate two field multiplications (or a field multiplication + a field division) - // with only one non-native field reduction. - // E.g. evaluating (a * b) + (c * d) = e mod p only requires 1 quotient and remainder, which is the major cost - // of a non-native field multiplication - const Fq x_sub_x4 = x - x_4; - - const Fq x4_sub_add2x = x_4 - add2.x3_prev; - - const Fq lambda_3 = Fq::msub_div({ minus_lambda_2, add2.lambda_prev }, - { (x_sub_x4), (add2.x1_prev - add2.x3_prev) }, - (x4_sub_add2x), - { y, -add2.y1_prev }); - - x_4.assert_is_not_equal(add2.x3_prev); - - const Fq x_5 = lambda_3.sqradd({ -x_4, -add2.x3_prev }); - const Fq x5_sub_x4 = x_5 - x_4; - - const Fq half_minus_lambda_4_minus_lambda_3 = Fq::msub_div({ minus_lambda_2 }, { x_sub_x4 }, (x5_sub_x4), { y }); + struct composite_y { + std::vector mul_left; + std::vector mul_right; + std::vector add; + bool is_negative = false; + }; + + Fq previous_x = x; + composite_y previous_y{ std::vector(), std::vector(), std::vector(), false }; + for (size_t i = 0; i < add.size(); ++i) { + previous_x.assert_is_not_equal(add[i].x3_prev); + + // composite_y add_y; + bool negate_add_y = (i > 0) && !previous_y.is_negative; + std::vector lambda1_left; + std::vector lambda1_right; + std::vector lambda1_add; + + if (i == 0) { + lambda1_add.emplace_back(-y); + } else { + lambda1_left = previous_y.mul_left; + lambda1_right = previous_y.mul_right; + lambda1_add = previous_y.add; + } - const Fq minus_lambda_4_minus_lambda_3 = half_minus_lambda_4_minus_lambda_3 + half_minus_lambda_4_minus_lambda_3; - const Fq minus_lambda_4 = minus_lambda_4_minus_lambda_3 + lambda_3; - const Fq x_6 = minus_lambda_4.sqradd({ -x_4, -x_5 }); + if (!add[i].is_element) { + lambda1_left.emplace_back(add[i].lambda_prev); + lambda1_right.emplace_back(negate_add_y ? add[i].x3_prev - add[i].x1_prev + : add[i].x1_prev - add[i].x3_prev); + lambda1_add.emplace_back(negate_add_y ? add[i].y1_prev : -add[i].y1_prev); + } else if (i > 0) { + lambda1_add.emplace_back(negate_add_y ? -add[i].y3_prev : add[i].y3_prev); + } + // if previous_y is negated then add stays positive + // if previous_y is positive then add stays negated + // | add.y is negated | previous_y is negated | output of msub_div is -lambda | + // | --- | --- | --- | + // | no | yes | yes | + // | yes | no | no | + + Fq lambda1; + if (!add[i].is_element || i > 0) { + bool flip_lambda1_denominator = !negate_add_y; + Fq denominator = flip_lambda1_denominator ? previous_x - add[i].x3_prev : add[i].x3_prev - previous_x; + lambda1 = Fq::msub_div(lambda1_left, lambda1_right, denominator, lambda1_add); + } else { + lambda1 = Fq::div_without_denominator_check({ add[i].y3_prev - y }, (add[i].x3_prev - x)); + } - const Fq x6_sub_x4 = x_6 - x_4; + Fq x_3 = lambda1.madd(lambda1, { -add[i].x3_prev, -previous_x }); - const Fq y_6 = Fq::dual_madd(minus_lambda_4, (x6_sub_x4), minus_lambda_2, x_sub_x4, { y }); + // We can avoid computing y_4, instead substituting the expression `minus_lambda_2 * (x_4 - x) - y` where + // needed. This is cheaper, because we can evaluate two field multiplications (or a field multiplication + a + // field division) with only one non-native field reduction. E.g. evaluating (a * b) + (c * d) = e mod p only + // requires 1 quotient and remainder, which is the major cost of a non-native field multiplication + Fq lambda2; + if (i == 0) { + lambda2 = Fq::div_without_denominator_check({ y + y }, (previous_x - x_3)) - lambda1; + } else { + Fq l2_denominator = previous_y.is_negative ? previous_x - x_3 : x_3 - previous_x; + Fq partial_lambda2 = + Fq::msub_div(previous_y.mul_left, previous_y.mul_right, l2_denominator, previous_y.add); + partial_lambda2 = partial_lambda2 + partial_lambda2; + lambda2 = partial_lambda2 - lambda1; + } - return element(x_6, y_6); -} -/** - * If we chain two iterations of the montgomery ladder together, we can squeeze out a non-native field reduction - **/ -template -element element::double_into_montgomery_ladder(const element& add1) const -{ - const Fq two_x = x + x; - Fq x_1; - Fq minus_lambda_dbl; - if constexpr (G::has_a) { - Fq a(get_context(), uint256_t(G::curve_a)); - minus_lambda_dbl = Fq::msub_div({ x }, { (two_x + x) }, (y + y), { a }); - x_1 = minus_lambda_dbl.sqradd({ -(two_x) }); - } else { - minus_lambda_dbl = Fq::msub_div({ x }, { (two_x + x) }, (y + y), {}); - x_1 = minus_lambda_dbl.sqradd({ -(two_x) }); + Fq x_4 = lambda2.sqradd({ -x_3, -previous_x }); + composite_y y_4; + if (i == 0) { + // We want to make sure that at the final iteration, `y_previous.is_negative = false` + // Each iteration flips the sign of y_previous.is_negative. + // i.e. whether we store y_4 or -y_4 depends on the number of points we have + bool num_points_even = ((add.size() & 0x01UL) == 0); + y_4.add.emplace_back(num_points_even ? y : -y); + y_4.mul_left.emplace_back(lambda2); + y_4.mul_right.emplace_back(num_points_even ? x_4 - previous_x : previous_x - x_4); + y_4.is_negative = num_points_even; + } else { + y_4.is_negative = !previous_y.is_negative; + y_4.mul_left.emplace_back(lambda2); + y_4.mul_right.emplace_back(previous_y.is_negative ? previous_x - x_4 : x_4 - previous_x); + // append terms in previous_y to y_4. We want to make sure the terms above are added into the start of y_4. + // This is to ensure they are cached correctly when + // `composer::evaluate_partial_non_native_field_multiplication` is called. + // (the 1st mul_left, mul_right elements will trigger composer::evaluate_non_native_field_multiplication + // when Fq::mult_madd is called - this term cannot be cached so we want to make sure it is unique) + std::copy(previous_y.mul_left.begin(), previous_y.mul_left.end(), std::back_inserter(y_4.mul_left)); + std::copy(previous_y.mul_right.begin(), previous_y.mul_right.end(), std::back_inserter(y_4.mul_right)); + std::copy(previous_y.add.begin(), previous_y.add.end(), std::back_inserter(y_4.add)); + } + previous_x = x_4; + previous_y = y_4; } + Fq x_out = previous_x; - add1.x.assert_is_not_equal(x_1); - - const Fq x_minus_x_1 = x - x_1; - const Fq lambda_1 = Fq::msub_div({ minus_lambda_dbl }, { x_minus_x_1 }, (x_1 - add1.x), { add1.y, y }); - - const Fq x_3 = lambda_1.sqradd({ -add1.x, -x_1 }); - const Fq half_minus_lambda_2_minus_lambda_1 = - Fq::msub_div({ minus_lambda_dbl }, { x_minus_x_1 }, (x_3 - x_1), { y }); - const Fq minus_lambda_2_minus_lambda_1 = half_minus_lambda_2_minus_lambda_1 + half_minus_lambda_2_minus_lambda_1; - const Fq minus_lambda_2 = minus_lambda_2_minus_lambda_1 + lambda_1; - - const Fq x_4 = minus_lambda_2.sqradd({ -x_1, -x_3 }); - - const Fq y_4 = Fq::dual_madd(minus_lambda_2, (x_4 - x_1), minus_lambda_dbl, x_minus_x_1, { y }); + ASSERT(!previous_y.is_negative); - return element(x_4, y_4); + Fq y_out = Fq::mult_madd(previous_y.mul_left, previous_y.mul_right, previous_y.add); + return element(x_out, y_out); } /** @@ -551,6 +601,7 @@ element element::batch_mul(const std::vector& scalars, const size_t max_num_bits) { + const size_t num_points = points.size(); ASSERT(scalars.size() == num_points); batch_lookup_table point_table(points); @@ -563,38 +614,25 @@ element element::batch_mul(const std::vector> nafs; - for (size_t j = 0; j < num_points; ++j) { - nafs.emplace_back(naf_entries[j][i * 2 - 1]); - } - element::chain_add_accumulator add_1 = point_table.get_chain_add_accumulator(nafs); - for (size_t j = 0; j < num_points; ++j) { - nafs[j] = (naf_entries[j][i * 2]); - } - element::chain_add_accumulator add_2 = point_table.get_chain_add_accumulator(nafs); - if (!add_1.is_element) { - accumulator = accumulator.double_montgomery_ladder(add_1, add_2); - } else { - accumulator = accumulator.double_montgomery_ladder(element(add_1.x3_prev, add_1.y3_prev), - element(add_2.x3_prev, add_2.y3_prev)); + constexpr size_t num_rounds_per_iteration = 4; + size_t num_iterations = num_rounds / num_rounds_per_iteration; + num_iterations += ((num_iterations * num_rounds_per_iteration) == num_rounds) ? 0 : 1; + const size_t num_rounds_per_final_iteration = (num_rounds - 1) - ((num_iterations - 1) * num_rounds_per_iteration); + for (size_t i = 0; i < num_iterations; ++i) { + + std::vector> nafs(num_points); + std::vector to_add; + const size_t inner_num_rounds = + (i != num_iterations - 1) ? num_rounds_per_iteration : num_rounds_per_final_iteration; + for (size_t j = 0; j < inner_num_rounds; ++j) { + for (size_t k = 0; k < num_points; ++k) { + nafs[k] = (naf_entries[k][i * num_rounds_per_iteration + j + 1]); + } + to_add.emplace_back(point_table.get_chain_add_accumulator(nafs)); } + accumulator = accumulator.multiple_montgomery_ladder(to_add); } - if ((num_rounds & 0x01ULL) == 0x00ULL) { - std::vector> nafs; - for (size_t j = 0; j < points.size(); ++j) { - nafs.emplace_back(naf_entries[j][num_rounds - 1]); - } - element::chain_add_accumulator add_1 = point_table.get_chain_add_accumulator(nafs); - if (add_1.is_element) { - element temp(add_1.x3_prev, add_1.y3_prev); - accumulator = accumulator.montgomery_ladder(temp); - } else { - accumulator = accumulator.montgomery_ladder(add_1); - } - } - for (size_t i = 0; i < num_points; ++i) { element skew = accumulator - points[i]; Fq out_x = accumulator.x.conditional_select(skew.x, naf_entries[i][num_rounds]); diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_nafs.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_nafs.hpp index 63257d56873..49e43ba6e9c 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_nafs.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_nafs.hpp @@ -243,9 +243,15 @@ typename element::secp256k1_wnaf_pair element::compu // Compute and constrain skews field_t negative_skew = witness_t(ctx, is_negative ? 0 : skew); field_t positive_skew = witness_t(ctx, is_negative ? skew : 0); - negative_skew.create_range_constraint(1); - positive_skew.create_range_constraint(1); - (negative_skew + positive_skew).create_range_constraint(1); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint(negative_skew.witness_index, 1, "biggroup_nafs"); + ctx->create_new_range_constraint(positive_skew.witness_index, 1, "biggroup_nafs"); + ctx->create_new_range_constraint((negative_skew + positive_skew).witness_index, 1, "biggroup_nafs"); + } else { + ctx->create_range_constraint(negative_skew.witness_index, 1, "biggroup_nafs"); + ctx->create_range_constraint(positive_skew.witness_index, 1, "biggroup_nafs"); + ctx->create_range_constraint((negative_skew + positive_skew).witness_index, 1, "biggroup_nafs"); + } const auto reconstruct_bigfield_from_wnaf = [ctx](const std::vector>& wnaf, const field_t& positive_skew, @@ -378,14 +384,21 @@ std::vector> element::compute_wnaf(const Fr& scalar) offset_entry = (1ULL << (WNAF_SIZE - 1)) - 1 - (wnaf_values[i] & 0xffffff); } field_t entry(witness_t(ctx, offset_entry)); - - entry.create_range_constraint(WNAF_SIZE); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint(entry.witness_index, 1ULL << (WNAF_SIZE), "biggroup_nafs"); + } else { + ctx->create_range_constraint(entry.witness_index, WNAF_SIZE, "biggroup_nafs"); + } wnaf_entries.emplace_back(entry); } // add skew wnaf_entries.emplace_back(witness_t(ctx, skew)); - wnaf_entries[wnaf_entries.size() - 1].create_range_constraint(1); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint(wnaf_entries[wnaf_entries.size() - 1].witness_index, 1, "biggroup_nafs"); + } else { + ctx->create_range_constraint(wnaf_entries[wnaf_entries.size() - 1].witness_index, 1, "biggroup_nafs"); + } // TODO: VALIDATE SUM DOES NOT OVERFLOW P @@ -494,15 +507,25 @@ std::vector> element::compute_naf(const Fr& scalar, cons bit.context = ctx; bit.witness_index = witness_t(ctx, true).witness_index; // flip sign bit.witness_bool = true; - ctx->create_range_constraint( - bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in non-next_entry case"); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint( + bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in non-next_entry case"); + } else { + ctx->create_range_constraint( + bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in non-next_entry case"); + } naf_entries[num_rounds - i - 1] = bit; } else { bool_t bit(ctx, false); bit.witness_index = witness_t(ctx, false).witness_index; // don't flip sign bit.witness_bool = false; - ctx->create_range_constraint( - bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in next_entry case"); + if constexpr (C::type == ComposerType::PLOOKUP) { + ctx->create_new_range_constraint( + bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in next_entry case"); + } else { + ctx->create_range_constraint( + bit.witness_index, 1, "biggroup_nafs: compute_naf extracted too many bits in next_entry case"); + } naf_entries[num_rounds - i - 1] = bit; } } diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_secp256k1.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_secp256k1.hpp index 7d510e2794a..756f955a7a1 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_secp256k1.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_secp256k1.hpp @@ -86,15 +86,18 @@ element element::secp256k1_ecdsa_mul(const element& // See `stdlib/memory/rom_table.hpp` for how indirect array accesses are implemented in UltraPlonk const auto& add_1 = endoP2_table[u2_hi_wnaf.wnaf[2 * i]]; const auto& add_2 = P2_table[u2_lo_wnaf.wnaf[2 * i + 1]]; - accumulator = accumulator.double_montgomery_ladder(add_1, add_2); - const auto& add_3 = endoP1_table[u1_hi_wnaf.wnaf[i]]; const auto& add_4 = P1_table[u1_lo_wnaf.wnaf[i]]; - accumulator = accumulator.double_montgomery_ladder(add_3, add_4); - const auto& add_5 = endoP2_table[u2_hi_wnaf.wnaf[2 * i + 1]]; const auto& add_6 = P2_table[u2_lo_wnaf.wnaf[2 * i + 2]]; - accumulator = accumulator.double_montgomery_ladder(add_5, add_6); + + accumulator = accumulator.multiple_montgomery_ladder({ element::chain_add_accumulator(add_1), + element::chain_add_accumulator(add_2), + element::chain_add_accumulator(add_3) }); + + accumulator = accumulator.multiple_montgomery_ladder({ element::chain_add_accumulator(add_4), + element::chain_add_accumulator(add_5), + element::chain_add_accumulator(add_6) }); } /** diff --git a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp index 9098799a7b1..7f31017f670 100644 --- a/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp +++ b/cpp/src/barretenberg/stdlib/primitives/biggroup/biggroup_tables.hpp @@ -4,10 +4,27 @@ namespace proof_system::plonk { namespace stdlib { using plookup::MultiTableId; + +/** + * @brief Constructs a ROM table to look up linear combinations of group elements + * + * @tparam C + * @tparam Fq + * @tparam Fr + * @tparam G + * @tparam num_elements + * @tparam typename + * @param rom_data the ROM table we are writing into + * @param limb_max the maximum size of each limb in the ROM table. + * + * @details When reading a group element *out* of the ROM table, we must know the maximum value of each coordinate's + * limbs. We take this value to be the maximum of the maximum values of the input limbs into the table! + * @return std::array, 5> + */ template template std::array, 5> element::create_group_element_rom_tables( - const std::array& rom_data) + const std::array& rom_data, std::array& limb_max) { std::vector, 2>> x_lo_limbs; std::vector, 2>> x_hi_limbs; @@ -16,6 +33,15 @@ std::array, 5> element::create_group_element_rom std::vector, 2>> prime_limbs; for (size_t i = 0; i < num_elements; ++i) { + limb_max[0] = std::max(limb_max[0], rom_data[i].x.binary_basis_limbs[0].maximum_value); + limb_max[1] = std::max(limb_max[1], rom_data[i].x.binary_basis_limbs[1].maximum_value); + limb_max[2] = std::max(limb_max[2], rom_data[i].x.binary_basis_limbs[2].maximum_value); + limb_max[3] = std::max(limb_max[3], rom_data[i].x.binary_basis_limbs[3].maximum_value); + limb_max[4] = std::max(limb_max[4], rom_data[i].y.binary_basis_limbs[0].maximum_value); + limb_max[5] = std::max(limb_max[5], rom_data[i].y.binary_basis_limbs[1].maximum_value); + limb_max[6] = std::max(limb_max[6], rom_data[i].y.binary_basis_limbs[2].maximum_value); + limb_max[7] = std::max(limb_max[7], rom_data[i].y.binary_basis_limbs[3].maximum_value); + x_lo_limbs.emplace_back(std::array, 2>{ rom_data[i].x.binary_basis_limbs[0].element, rom_data[i].x.binary_basis_limbs[1].element }); x_hi_limbs.emplace_back(std::array, 2>{ rom_data[i].x.binary_basis_limbs[2].element, @@ -39,7 +65,7 @@ std::array, 5> element::create_group_element_rom template template element element::read_group_element_rom_tables( - const std::array, 5>& tables, const field_t& index) + const std::array, 5>& tables, const field_t& index, const std::array& limb_max) { const auto xlo = tables[0][index]; const auto xhi = tables[1][index]; @@ -49,6 +75,15 @@ element element::read_group_element_rom_tables( Fq x_fq(xlo[0], xlo[1], xhi[0], xhi[1], xyprime[0]); Fq y_fq(ylo[0], ylo[1], yhi[0], yhi[1], xyprime[1]); + x_fq.binary_basis_limbs[0].maximum_value = limb_max[0]; + x_fq.binary_basis_limbs[1].maximum_value = limb_max[1]; + x_fq.binary_basis_limbs[2].maximum_value = limb_max[2]; + x_fq.binary_basis_limbs[3].maximum_value = limb_max[3]; + y_fq.binary_basis_limbs[0].maximum_value = limb_max[4]; + y_fq.binary_basis_limbs[1].maximum_value = limb_max[5]; + y_fq.binary_basis_limbs[2].maximum_value = limb_max[6]; + y_fq.binary_basis_limbs[3].maximum_value = limb_max[7]; + const auto output = element(x_fq, y_fq); return output; } @@ -64,17 +99,17 @@ element::four_bit_table_plookup::four_bit_table_plookup(const e element_table[i] = element_table[i - 1] + d2; } for (size_t i = 0; i < 8; ++i) { - element_table[i] = (-element_table[15 - i]).reduce(); + element_table[i] = (-element_table[15 - i]); } - coordinates = create_group_element_rom_tables<16>(element_table); + coordinates = create_group_element_rom_tables<16>(element_table, limb_max); } template template element element::four_bit_table_plookup::operator[](const field_t& index) const { - return read_group_element_rom_tables<16>(coordinates, index); + return read_group_element_rom_tables<16>(coordinates, index, limb_max); } template @@ -146,109 +181,134 @@ template element::lookup_table_plookup::lookup_table_plookup(const std::array& inputs) { if constexpr (length == 2) { - element_table[0] = inputs[1] + inputs[0]; - element_table[1] = inputs[1] - inputs[0]; + auto [A0, A1] = inputs[1].add_sub(inputs[0]); + element_table[0] = A0; + element_table[1] = A1; } else if constexpr (length == 3) { - element R0 = inputs[1] + inputs[0]; - element R1 = inputs[1] - inputs[0]; - element_table[0] = inputs[2] + R0; // C + B + A - element_table[1] = inputs[2] + R1; // C + B - A - element_table[2] = inputs[2] - R1; // C - B + A - element_table[3] = inputs[2] - R0; // C - B - A - } else if constexpr (length == 4) { - element T0 = inputs[1] + inputs[0]; - element T1 = inputs[1] - inputs[0]; - element T2 = inputs[3] + inputs[2]; - element T3 = inputs[3] - inputs[2]; + auto [R0, R1] = inputs[1].add_sub(inputs[0]); // B ± A - element_table[0] = T2 + T0; // D + C + B + A - element_table[1] = T2 + T1; // D + C + B - A - element_table[2] = T2 - T1; // D + C - B + A - element_table[3] = T2 - T0; // D + C - B - A - element_table[4] = T3 + T0; // D - C + B + A - element_table[5] = T3 + T1; // D - C + B - A - element_table[6] = T3 - T1; // D - C - B + A - element_table[7] = T3 - T0; // D - C - B - A - } else if constexpr (length == 5) { - element A0 = inputs[1] + inputs[0]; // B + A - element A1 = inputs[1] - inputs[0]; // B - A + auto [T0, T1] = inputs[2].add_sub(R0); // C ± (B + A) + auto [T2, T3] = inputs[2].add_sub(R1); // C ± (B - A) - element T2 = inputs[3] + inputs[2]; // D + C - element T3 = inputs[3] - inputs[2]; // D - C - - element E0 = inputs[4] + T2; // E + D + C // 0 0 0 - element E1 = inputs[4] + T3; // E + D - C // 0 0 1 - element E2 = inputs[4] - T3; // E - D + C // 0 1 0 - element E3 = inputs[4] - T2; // E - D - C // 0 1 1 - - element_table[0] = E0 + A0; // E + D + C + B + A // 0 0 0 0 0 - element_table[1] = E0 + A1; // E + D + C + B - A // 0 0 0 0 1 - element_table[2] = E0 - A1; // E + D + C - B + A // 0 0 0 1 0 - element_table[3] = E0 - A0; // E + D + C - B - A // 0 0 0 1 1 - element_table[4] = E1 + A0; // E + D - C + B + A // 0 0 1 0 0 - element_table[5] = E1 + A1; // E + D - C + B - A // 0 0 1 0 1 - element_table[6] = E1 - A1; // E + D - C - B + A // 0 0 1 1 0 - element_table[7] = E1 - A0; // E + D - C - B - A // 0 0 1 1 1 - element_table[8] = E2 + A0; // E - D + C + B + A // 0 1 0 0 0 - element_table[9] = E2 + A1; // E - D + C + B - A // 0 1 0 0 1 - element_table[10] = E2 - A1; // E - D + C - B + A // 0 1 0 1 0 - element_table[11] = E2 - A0; // E - D - C - B - A // 0 1 0 1 1 - element_table[12] = E3 + A0; // E - D - C + B + A // 0 1 1 0 0 - element_table[13] = E3 + A1; // E - D - C + B - A // 0 1 1 0 1 - element_table[14] = E3 - A1; // E - D - C - B + A // 0 1 1 1 0 - element_table[15] = E3 - A0; // E - D - C - B - A // 0 1 1 1 1 + element_table[0] = T0; + element_table[1] = T2; + element_table[2] = T3; + element_table[3] = T1; + } else if constexpr (length == 4) { + auto [T0, T1] = inputs[1].add_sub(inputs[0]); // B ± A + auto [T2, T3] = inputs[3].add_sub(inputs[2]); // D ± C + + auto [F0, F3] = T2.add_sub(T0); // (D + C) ± (B + A) + auto [F1, F2] = T2.add_sub(T1); // (D + C) ± (B - A) + auto [F4, F7] = T3.add_sub(T0); // (D - C) ± (B + A) + auto [F5, F6] = T3.add_sub(T1); // (D - C) ± (B - A) + + element_table[0] = F0; + element_table[1] = F1; + element_table[2] = F2; + element_table[3] = F3; + element_table[4] = F4; + element_table[5] = F5; + element_table[6] = F6; + element_table[7] = F7; + } else if constexpr (length == 5) { + auto [A0, A1] = inputs[1].add_sub(inputs[0]); // B ± A + auto [T2, T3] = inputs[3].add_sub(inputs[2]); // D ± C + + auto [E0, E3] = inputs[4].add_sub(T2); // E ± (D + C) + auto [E1, E2] = inputs[4].add_sub(T3); // E ± (D - C) + + auto [F0, F3] = E0.add_sub(A0); + auto [F1, F2] = E0.add_sub(A1); + auto [F4, F7] = E1.add_sub(A0); + auto [F5, F6] = E1.add_sub(A1); + auto [F8, F11] = E2.add_sub(A0); + auto [F9, F10] = E2.add_sub(A1); + auto [F12, F15] = E3.add_sub(A0); + auto [F13, F14] = E3.add_sub(A1); + + element_table[0] = F0; + element_table[1] = F1; + element_table[2] = F2; + element_table[3] = F3; + element_table[4] = F4; + element_table[5] = F5; + element_table[6] = F6; + element_table[7] = F7; + element_table[8] = F8; + element_table[9] = F9; + element_table[10] = F10; + element_table[11] = F11; + element_table[12] = F12; + element_table[13] = F13; + element_table[14] = F14; + element_table[15] = F15; } else if constexpr (length == 6) { // 44 adds! Only use this if it saves us adding another table to a multi-scalar-multiplication - element A0 = inputs[1] + inputs[0]; // B + A - element A1 = inputs[1] - inputs[0]; // B - A - element E0 = inputs[4] + inputs[3]; // E + D - element E1 = inputs[4] - inputs[3]; // E - D - - element C0 = inputs[2] + A0; // C + B + A - element C1 = inputs[2] + A1; // C + B - A - element C2 = inputs[2] - A1; // C - B + A - element C3 = inputs[2] - A0; // C - B - A - - element F0 = inputs[5] + E0; // F + E + D - element F1 = inputs[5] + E1; // F + E - D - element F2 = inputs[5] - E1; // F - E + D - element F3 = inputs[5] - E0; // F - E - E - - element_table[0] = F0 + C0; - element_table[1] = F0 + C1; - element_table[2] = F0 + C2; - element_table[3] = F0 + C3; - element_table[4] = F0 - C3; - element_table[5] = F0 - C2; - element_table[6] = F0 - C1; - element_table[7] = F0 - C0; - - element_table[8] = F1 + C0; - element_table[9] = F1 + C1; - element_table[10] = F1 + C2; - element_table[11] = F1 + C3; - element_table[12] = F1 - C3; - element_table[13] = F1 - C2; - element_table[14] = F1 - C1; - element_table[15] = F1 - C0; - - element_table[16] = F2 + C0; - element_table[17] = F2 + C1; - element_table[18] = F2 + C2; - element_table[19] = F2 + C3; - element_table[20] = F2 - C3; - element_table[21] = F2 - C2; - element_table[22] = F2 - C1; - element_table[23] = F2 - C0; - - element_table[24] = F3 + C0; - element_table[25] = F3 + C1; - element_table[26] = F3 + C2; - element_table[27] = F3 + C3; - element_table[28] = F3 - C3; - element_table[29] = F3 - C2; - element_table[30] = F3 - C1; - element_table[31] = F3 - C0; + + auto [A0, A1] = inputs[1].add_sub(inputs[0]); + auto [E0, E1] = inputs[4].add_sub(inputs[3]); + auto [C0, C3] = inputs[2].add_sub(A0); + auto [C1, C2] = inputs[2].add_sub(A1); + + auto [F0, F3] = inputs[5].add_sub(E0); + auto [F1, F2] = inputs[5].add_sub(E1); + + auto [R0, R7] = F0.add_sub(C0); + auto [R1, R6] = F0.add_sub(C1); + auto [R2, R5] = F0.add_sub(C2); + auto [R3, R4] = F0.add_sub(C3); + + auto [S0, S7] = F1.add_sub(C0); + auto [S1, S6] = F1.add_sub(C1); + auto [S2, S5] = F1.add_sub(C2); + auto [S3, S4] = F1.add_sub(C3); + + auto [U0, U7] = F2.add_sub(C0); + auto [U1, U6] = F2.add_sub(C1); + auto [U2, U5] = F2.add_sub(C2); + auto [U3, U4] = F2.add_sub(C3); + + auto [W0, W7] = F3.add_sub(C0); + auto [W1, W6] = F3.add_sub(C1); + auto [W2, W5] = F3.add_sub(C2); + auto [W3, W4] = F3.add_sub(C3); + + element_table[0] = R0; + element_table[1] = R1; + element_table[2] = R2; + element_table[3] = R3; + element_table[4] = R4; + element_table[5] = R5; + element_table[6] = R6; + element_table[7] = R7; + + element_table[8] = S0; + element_table[9] = S1; + element_table[10] = S2; + element_table[11] = S3; + element_table[12] = S4; + element_table[13] = S5; + element_table[14] = S6; + element_table[15] = S7; + + element_table[16] = U0; + element_table[17] = U1; + element_table[18] = U2; + element_table[19] = U3; + element_table[20] = U4; + element_table[21] = U5; + element_table[22] = U6; + element_table[23] = U7; + + element_table[24] = W0; + element_table[25] = W1; + element_table[26] = W2; + element_table[27] = W3; + element_table[28] = W4; + element_table[29] = W5; + element_table[30] = W6; + element_table[31] = W7; } else if constexpr (length == 7) { // 82 adds! This one is not worth using... @@ -341,9 +401,9 @@ element::lookup_table_plookup::lookup_table_plookup(con element_table[63] = G3 - E0; } for (size_t i = 0; i < table_size / 2; ++i) { - element_table[i + table_size / 2] = (-element_table[table_size / 2 - 1 - i]).reduce(); + element_table[i + table_size / 2] = (-element_table[table_size / 2 - 1 - i]); } - coordinates = create_group_element_rom_tables(element_table); + coordinates = create_group_element_rom_tables(element_table, limb_max); } template @@ -356,7 +416,7 @@ element element::lookup_table_plookup::ge accumulators.emplace_back(field_t(bits[i]) * (1ULL << i)); } field_t index = field_t::accumulate(accumulators); - return read_group_element_rom_tables(coordinates, index); + return read_group_element_rom_tables(coordinates, index, limb_max); } /** diff --git a/cpp/src/barretenberg/stdlib/recursion/aggregation_state/aggregation_state.hpp b/cpp/src/barretenberg/stdlib/recursion/aggregation_state/aggregation_state.hpp index 64d7e69fc8f..005ce5aec21 100644 --- a/cpp/src/barretenberg/stdlib/recursion/aggregation_state/aggregation_state.hpp +++ b/cpp/src/barretenberg/stdlib/recursion/aggregation_state/aggregation_state.hpp @@ -28,9 +28,45 @@ template struct aggregation_state { // has_data == other.has_data; can't compare as native }; + /** + * @brief TODO(@dbanks12 please migrate A3 circuits to using `assign_object_to_proof_outputs`. Much safer to not + * independently track `proof_witness_indices` and whether object has been assigned to public inputs) + * + */ void add_proof_outputs_as_public_inputs() { - ASSERT(proof_witness_indices.size() > 0); + auto* context = P0.get_context(); + context->add_recursive_proof(proof_witness_indices); + } + + void assign_object_to_proof_outputs() + { + if (proof_witness_indices.size() == 0) { + std::cerr << "warning. calling `add_proof_outputs_as_public_inputs`, but aggregation object already has " + "assigned proof outputs to public inputs."; + return; + } + + P0 = P0.reduce(); + P1 = P1.reduce(); + proof_witness_indices = { + P0.x.binary_basis_limbs[0].element.normalize().witness_index, + P0.x.binary_basis_limbs[1].element.normalize().witness_index, + P0.x.binary_basis_limbs[2].element.normalize().witness_index, + P0.x.binary_basis_limbs[3].element.normalize().witness_index, + P0.y.binary_basis_limbs[0].element.normalize().witness_index, + P0.y.binary_basis_limbs[1].element.normalize().witness_index, + P0.y.binary_basis_limbs[2].element.normalize().witness_index, + P0.y.binary_basis_limbs[3].element.normalize().witness_index, + P1.x.binary_basis_limbs[0].element.normalize().witness_index, + P1.x.binary_basis_limbs[1].element.normalize().witness_index, + P1.x.binary_basis_limbs[2].element.normalize().witness_index, + P1.x.binary_basis_limbs[3].element.normalize().witness_index, + P1.y.binary_basis_limbs[0].element.normalize().witness_index, + P1.y.binary_basis_limbs[1].element.normalize().witness_index, + P1.y.binary_basis_limbs[2].element.normalize().witness_index, + P1.y.binary_basis_limbs[3].element.normalize().witness_index, + }; auto* context = P0.get_context(); diff --git a/cpp/src/barretenberg/stdlib/recursion/transcript/transcript.hpp b/cpp/src/barretenberg/stdlib/recursion/transcript/transcript.hpp index 5ca0439d9c0..9db2f12dc9e 100644 --- a/cpp/src/barretenberg/stdlib/recursion/transcript/transcript.hpp +++ b/cpp/src/barretenberg/stdlib/recursion/transcript/transcript.hpp @@ -12,10 +12,11 @@ #include "../../commitment/pedersen/pedersen_plookup.hpp" #include "../../primitives/bigfield/bigfield.hpp" #include "../../primitives/biggroup/biggroup.hpp" -#include "../../primitives/bool/bool.hpp" #include "../../primitives/field/field.hpp" #include "../../primitives/witness/witness.hpp" +#include "../../primitives/bool/bool.hpp" +#include "../verification_key//verification_key.hpp" namespace proof_system::plonk { namespace stdlib { namespace recursion { @@ -133,196 +134,140 @@ template class Transcript { ++current_round; return; } - const size_t bytes_per_element = 31; - - // split element into 2 limbs and insert into element_buffer - // each entry in element_buffer is 31 bytes - const auto split = [&](field_pt& work_element, - std::vector& element_buffer, - const field_pt& element, - size_t& current_byte_counter, - const size_t num_bytes) { - uint256_t element_u256(element.get_value()); - size_t hi_bytes = bytes_per_element - current_byte_counter; - if (hi_bytes >= num_bytes) { - // hmm - size_t new_byte_counter = current_byte_counter + num_bytes; - field_pt hi = element; - const size_t leftovers = bytes_per_element - new_byte_counter; - field_pt buffer_shift = - field_pt(context, barretenberg::fr(uint256_t(1) << ((uint64_t)leftovers * 8ULL))); - work_element = work_element + (hi * buffer_shift); - work_element = work_element.normalize(); - current_byte_counter = new_byte_counter; - if (current_byte_counter == bytes_per_element) { - current_byte_counter = 0; - element_buffer.push_back(work_element); - work_element = field_pt(context, barretenberg::fr(0)); - } - return; - } - const size_t lo_bytes = num_bytes - hi_bytes; - field_pt lo = witness_t(context, barretenberg::fr(element_u256.slice(0, lo_bytes * 8))); - field_pt hi = witness_t(context, barretenberg::fr(element_u256.slice(lo_bytes * 8, 256))); - lo.create_range_constraint(lo_bytes * 8); - hi.create_range_constraint(hi_bytes * 8); - field_pt shift(context, barretenberg::fr(uint256_t(1ULL) << (uint64_t)lo_bytes * 8ULL)); - field_pt sum = lo + (hi * shift); - if (!element.is_constant() || !sum.is_constant()) { - sum.assert_equal(element); - } - current_byte_counter = (current_byte_counter + num_bytes) % bytes_per_element; - - // if current_byte_counter == 0 we've rolled over - if (current_byte_counter == 0) { - element_buffer.push_back(work_element + hi); - element_buffer.push_back(lo); - work_element = field_pt(context, 0); - } else { - work_element = work_element + hi; - - element_buffer.push_back(work_element); - - field_t lo_shift( - context, barretenberg::fr(uint256_t(1ULL) << ((31ULL - (uint64_t)current_byte_counter) * 8ULL))); - work_element = (lo * lo_shift); - work_element = work_element.normalize(); - } - }; - - std::vector compression_buffer; field_pt working_element(context); - size_t byte_counter = 0; + // maximum number of bytes we can store in a field element w/o wrapping modulus is 31. + // while we could store more *bits*, we want `preimage_buffer` to mirror how data is formatted + // when we serialize field/group elements natively (i.e. a byte array) + static constexpr size_t NUM_BITS_PER_PREIMAGE_ELEMENT = 31UL * 8UL; + PedersenPreimageBuilder preimage_buffer(context); if (current_round > 0) { - split(working_element, compression_buffer, field_pt(current_challenge), byte_counter, 32); + preimage_buffer.add_element(current_challenge); } for (auto manifest_element : get_manifest().get_round_manifest(current_round).elements) { if (manifest_element.num_bytes == 32 && manifest_element.name != "public_inputs") { - split(working_element, - compression_buffer, - get_field_element(manifest_element.name), - byte_counter, - manifest_element.num_bytes); + preimage_buffer.add_element(get_field_element(manifest_element.name)); } else if (manifest_element.num_bytes == 64 && manifest_element.name != "public_inputs") { group_pt point = get_circuit_group_element(manifest_element.name); - field_pt y_hi = - point.y.binary_basis_limbs[2].element + (point.y.binary_basis_limbs[3].element * fq_pt::shift_1); - field_pt y_lo = - point.y.binary_basis_limbs[0].element + (point.y.binary_basis_limbs[1].element * fq_pt::shift_1); - field_pt x_hi = - point.x.binary_basis_limbs[2].element + (point.x.binary_basis_limbs[3].element * fq_pt::shift_1); - field_pt x_lo = - point.x.binary_basis_limbs[0].element + (point.x.binary_basis_limbs[1].element * fq_pt::shift_1); - const size_t lo_bytes = fq_pt::NUM_LIMB_BITS / 4; - const size_t hi_bytes = 32 - lo_bytes; - - split(working_element, compression_buffer, y_hi, byte_counter, hi_bytes); - split(working_element, compression_buffer, y_lo, byte_counter, lo_bytes); - split(working_element, compression_buffer, x_hi, byte_counter, hi_bytes); - split(working_element, compression_buffer, x_lo, byte_counter, lo_bytes); + // In our buffer, we want to represent each field element as occupying 256 bits of data (to match what + // the native transcript does) + const auto& x = point.x; + const auto& y = point.y; + constexpr size_t last_limb_bits = 256 - (fq_pt::NUM_LIMB_BITS * 3); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[3].element, + last_limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[2].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[1].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[0].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[3].element, + last_limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[2].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[1].element, + fq_pt::NUM_LIMB_BITS); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[0].element, + fq_pt::NUM_LIMB_BITS); + } else if (manifest_element.name == "public_inputs") { std::vector field_array = get_field_element_vector(manifest_element.name); for (size_t i = 0; i < field_array.size(); ++i) { - split(working_element, compression_buffer, field_array[i], byte_counter, 32); + preimage_buffer.add_element(field_array[i]); } } else if (manifest_element.num_bytes < 32 && manifest_element.name != "public_inputs") { - split(working_element, - compression_buffer, - get_field_element(manifest_element.name), - byte_counter, - manifest_element.num_bytes); + // TODO(zac): init round data is being grabbed out of the manifest and not the vkey + preimage_buffer.add_element_with_existing_range_constraint(get_field_element(manifest_element.name), + manifest_element.num_bytes * 8); } } - std::vector> round_challenges; + std::vector round_challenges_new; - if (byte_counter != 0) { - const uint256_t down_shift = uint256_t(1) << uint256_t((bytes_per_element - byte_counter) * 8); - working_element = working_element / barretenberg::fr(down_shift); - working_element = working_element.normalize(); + field_pt T0; + T0 = preimage_buffer.compress(0); - compression_buffer.push_back(working_element); - } + // helper method to slice a challenge into 128-bit slices + const auto slice_into_halves = [&](const field_pt& in, const size_t low_bits = 128) { + uint256_t v = in.get_value(); + uint256_t lo = v.slice(0, low_bits); + uint256_t hi = v.slice(low_bits, 256); - field_pt T0; - if constexpr (Composer::type == ComposerType::PLOOKUP) { - T0 = stdlib::pedersen_plookup_commitment::compress(compression_buffer); - } else { - T0 = stdlib::pedersen_commitment::compress(compression_buffer); - } - byte_array compressed_buffer(T0); + field_pt y_lo = field_pt::from_witness(context, lo); + field_pt y_hi = field_pt::from_witness(context, hi); - // TODO(@zac-williamson) make this a Poseidon hash - byte_array base_hash; - if constexpr (Composer::type == ComposerType::PLOOKUP) { - std::vector compression_buffer; - field_pt working_element(context); - size_t byte_counter = 0; - split(working_element, compression_buffer, field_pt(compressed_buffer), byte_counter, 32); - if (byte_counter != 0) { - const uint256_t down_shift = uint256_t(1) << uint256_t((bytes_per_element - byte_counter) * 8); - working_element = working_element / barretenberg::fr(down_shift); - working_element = working_element.normalize(); - compression_buffer.push_back(working_element); + y_lo.create_range_constraint(low_bits); + y_hi.create_range_constraint(254 - low_bits); + + in.add_two(-y_lo, -y_hi * (uint256_t(1) << low_bits)).assert_equal(0); + + // Validate the sum of our two halves does not exceed the circuit modulus over the integers + constexpr uint256_t modulus = fr::modulus; + const field_pt r_lo = field_pt(context, modulus.slice(0, low_bits)); + const field_pt r_hi = field_pt(context, modulus.slice(low_bits, 256)); + + bool need_borrow = (uint256_t(y_lo.get_value()) > uint256_t(r_lo.get_value())); + field_pt borrow = field_pt::from_witness(context, need_borrow); + + // directly call `create_new_range_constraint` to avoid creating an arithmetic gate + if constexpr (Composer::type == ComposerType::PLOOKUP) { + context->create_new_range_constraint(borrow.get_witness_index(), 1, "borrow"); + } else { + context->create_range_constraint(borrow.get_witness_index(), 1, "borrow"); } - base_hash = stdlib::pedersen_plookup_commitment::compress(compression_buffer); + + // Hi range check = r_hi - y_hi - borrow + // Lo range check = r_lo - y_lo + borrow * 2^{126} + field_pt res_hi = (r_hi - y_hi) - borrow; + field_pt res_lo = (r_lo - y_lo) + (borrow * (uint256_t(1) << low_bits)); + + res_hi.create_range_constraint(modulus.get_msb() + 1 - low_bits); + res_lo.create_range_constraint(low_bits); + + return std::array{ y_lo, y_hi }; + }; + + field_pt base_hash; + if constexpr (Composer::type == ComposerType::PLOOKUP) { + base_hash = stdlib::pedersen_plookup_commitment::compress(std::vector{ T0 }, 0); } else { - base_hash = stdlib::blake3s(compressed_buffer); + base_hash = stdlib::pedersen_commitment::compress(std::vector{ T0 }, 0); } - byte_array first(field_pt(0), 16); - first.write(base_hash.slice(0, 16)); - round_challenges.push_back(first); + auto hash_halves = slice_into_halves(base_hash); + round_challenges_new.push_back(hash_halves[1]); if (num_challenges > 1) { - byte_array second(field_pt(0), 16); - second.write(base_hash.slice(16, 16)); - round_challenges.push_back(second); + round_challenges_new.push_back(hash_halves[0]); } + base_hash = (slice_into_halves(base_hash, 8)[1] * 256).normalize(); - // This block of code only executes for num_challenges > 2, which (currently) only happens in the nu round when - // we need to generate short scalars. In this case, we generate 32-byte challenges and split them in half to get - // the relevant challenges. + // This block of code only executes for num_challenges > 2, which (currently) only happens in the nu round + // when we need to generate short scalars. In this case, we generate 32-byte challenges and split them in + // half to get the relevant challenges. for (size_t i = 2; i < num_challenges; i += 2) { - byte_array rolling_buffer = base_hash; - byte_array hash_output; + // TODO(@zac-williamson) make this a Poseidon hash not a Pedersen hash + field_pt hash_output; if constexpr (Composer::type == ComposerType::PLOOKUP) { - // TODO(@zac-williamson) make this a Poseidon hash not a Pedersen hash - std::vector compression_buffer; - field_pt working_element(context); - size_t byte_counter = 0; - split(working_element, compression_buffer, field_pt(rolling_buffer), byte_counter, 32); - split(working_element, compression_buffer, field_pt(field_pt(i / 2)), byte_counter, 1); - if (byte_counter != 0) { - const uint256_t down_shift = uint256_t(1) << uint256_t((bytes_per_element - byte_counter) * 8); - working_element = working_element / barretenberg::fr(down_shift); - working_element = working_element.normalize(); - compression_buffer.push_back(working_element); - } - hash_output = stdlib::pedersen_plookup_commitment::compress(compression_buffer); + hash_output = stdlib::pedersen_plookup_commitment::compress( + std::vector{ (base_hash + field_pt(i / 2)).normalize() }, 0); } else { - rolling_buffer.write(byte_array(field_pt(i / 2), 1)); - hash_output = stdlib::blake3s(rolling_buffer); + hash_output = stdlib::pedersen_commitment::compress( + std::vector{ (base_hash + field_pt(i / 2)).normalize() }, 0); } - byte_array hi(field_pt(0), 16); - hi.write(hash_output.slice(0, 16)); - round_challenges.push_back(hi); - + auto hash_halves = slice_into_halves(hash_output); + round_challenges_new.push_back(hash_halves[1]); if (i + 1 < num_challenges) { - byte_array lo(field_pt(0), 16); - lo.write(hash_output.slice(16, 16)); - round_challenges.push_back(lo); + round_challenges_new.push_back(hash_halves[0]); } } - - current_challenge = round_challenges[round_challenges.size() - 1]; + current_challenge = round_challenges_new[round_challenges_new.size() - 1]; ++current_round; - challenge_keys.push_back(challenge_name); std::vector challenge_elements; - for (const auto& challenge : round_challenges) { - challenge_elements.push_back(static_cast(challenge)); + for (const auto& challenge : round_challenges_new) { + challenge_elements.push_back(challenge); } challenge_values.push_back(challenge_elements); } @@ -420,7 +365,7 @@ template class Transcript { private: transcript::Transcript transcript_base; - byte_array current_challenge; + field_pt current_challenge; mutable std::vector field_vector_keys; mutable std::vector> field_vector_values; diff --git a/cpp/src/barretenberg/stdlib/recursion/verification_key/verification_key.hpp b/cpp/src/barretenberg/stdlib/recursion/verification_key/verification_key.hpp index 57b8a39207c..915f63e5a32 100644 --- a/cpp/src/barretenberg/stdlib/recursion/verification_key/verification_key.hpp +++ b/cpp/src/barretenberg/stdlib/recursion/verification_key/verification_key.hpp @@ -20,10 +20,163 @@ #include "../../commitment/pedersen/pedersen_plookup.hpp" #include "../../primitives/curves/bn254.hpp" +#include "barretenberg/crypto/pedersen_commitment/convert_buffer_to_field.hpp" + namespace proof_system::plonk { namespace stdlib { namespace recursion { +/** + * @brief Constructs a packed buffer of field elements to be fed into a Pedersen compress function + * Goal is to concatenate multiple inputs together into a single field element if the inputs are known to be + * small. Produces a vector of field elements where the maximum number of bits per element is `bits_per_element`. + * + * @details When calling `pedersen::compress` on the final buffer, we can skip the range checks normally performed in + * the compress method, because we know the sums of the scalar slices cannot exceed the field modulus. This requires + * `bits_per_element < modulus bits` + * @tparam Composer + * @tparam bits_per_element + */ +template struct PedersenPreimageBuilder { + using field_pt = field_t; + using witness_pt = witness_t; + + Composer* context; + + PedersenPreimageBuilder(Composer* ctx = nullptr) + : context(ctx){}; + + field_pt compress(const size_t hash_index) + { + // we can only use relaxed range checks in pedersen::compress iff bits_per_element < modulus bits + static_assert(bits_per_element < uint256_t(barretenberg::fr::modulus).get_msb()); + + if (current_bit_counter != 0) { + const uint256_t down_shift = uint256_t(1) << uint256_t((bits_per_element - current_bit_counter)); + for (auto& x : work_element) { + x = x / barretenberg::fr(down_shift); + } + preimage_data.push_back(field_pt::accumulate(work_element)); + } + if constexpr (Composer::type == ComposerType::PLOOKUP) { + return pedersen_plookup_commitment::compress_with_relaxed_range_constraints(preimage_data, + hash_index); + } else { + return pedersen_commitment::compress(preimage_data, hash_index); + } + } + + /** + * @brief preimage_data is a bit-array where `bits_per_element` number of bits are packed into a single field + * element + */ + std::vector preimage_data; + + /** + * @brief work_element represents the leading element to be added into `preimage_data`. + * Vector is composed of field elements that represent bit chunks of a known length, + * such that the sum of the bit chunks < bits_per_element + */ + std::vector work_element; + + size_t current_bit_counter = 0; + + void add_element(const field_pt& element) { slice_element(element, 256); } + + void add_element_with_existing_range_constraint(const field_pt& element, const size_t num_bits) + { + slice_element(element, num_bits); + } + + /** + * @brief Populate `preimage_data` with element whose size is known to be `num_bits`. + * `preimage_data` is treated as a bit-array where `bits_per_element` number of bits are packed into a single field + * element. `slice_element` will: + * + * 1. determine how many bits are remaining in work_element + * 2. if remaining bits > num_bits, slice `element` into 2 chunks hi/lo + * 3. fill work_element with `hi` chunk (or the full element if possible) + * 4. (if work_element is full) combine work_element chunks into a field element and push onto `preimage_data` + * 4. (if required) create a new work_element and populate with `lo` + * + * @param element + * @param num_bits + */ + void slice_element(const field_pt& element, const size_t num_bits) + { + ASSERT(context != nullptr); + uint256_t element_u256(element.get_value()); + size_t hi_bits = bits_per_element - current_bit_counter; + if (hi_bits >= num_bits) { + // hmm + size_t new_bit_counter = current_bit_counter + num_bits; + field_pt hi = element; + const size_t leftovers = bits_per_element - new_bit_counter; + field_pt buffer_shift = field_pt(context, barretenberg::fr(uint256_t(1) << ((uint64_t)leftovers))); + work_element.emplace_back(hi * buffer_shift); + current_bit_counter = new_bit_counter; + if (current_bit_counter == bits_per_element) { + current_bit_counter = 0; + preimage_data.push_back(field_pt::accumulate(work_element)); + + work_element = std::vector(); + } + return; + } + const size_t lo_bits = num_bits - hi_bits; + field_pt lo = witness_t(context, barretenberg::fr(element_u256.slice(0, lo_bits))); + field_pt hi = witness_t(context, barretenberg::fr(element_u256.slice(lo_bits, 256))); + lo.create_range_constraint(lo_bits); + hi.create_range_constraint(hi_bits); + field_pt shift(context, barretenberg::fr(uint256_t(1ULL) << (uint64_t)lo_bits)); + if (!element.is_constant() || !lo.is_constant() || !hi.is_constant()) { + lo.add_two(hi * shift, -element).assert_equal(0); + } + + constexpr uint256_t modulus = barretenberg::fr::modulus; + constexpr size_t modulus_bits = modulus.get_msb(); + + // If our input is a full field element we must validate the sum of our slices is < p + if (num_bits >= modulus_bits) { + const field_pt r_lo = field_pt(context, modulus.slice(0, lo_bits)); + const field_pt r_hi = field_pt(context, modulus.slice(lo_bits, num_bits)); + + bool need_borrow = (uint256_t(lo.get_value()) > uint256_t(r_lo.get_value())); + field_pt borrow = field_pt::from_witness(context, need_borrow); + + // directly call `create_new_range_constraint` to avoid creating an arithmetic gate + if constexpr (Composer::type == ComposerType::PLOOKUP) { + context->create_new_range_constraint(borrow.get_witness_index(), 1, "borrow"); + } else { + context->create_range_constraint(borrow.get_witness_index(), 1, "borrow"); + } + // Hi range check = r_hi - y_hi - borrow + // Lo range check = r_lo - y_lo + borrow * 2^{126} + field_t res_hi = (r_hi - hi) - borrow; + field_t res_lo = (r_lo - lo) + (borrow * (uint256_t(1) << lo_bits)); + + res_hi.create_range_constraint(modulus_bits + 1 - lo_bits); + res_lo.create_range_constraint(lo_bits); + } + current_bit_counter = (current_bit_counter + num_bits) % bits_per_element; + + // if current_bit_counter == 0 we've rolled over + if (current_bit_counter == 0) { + work_element.emplace_back(hi); + preimage_data.push_back(field_pt::accumulate(work_element)); + preimage_data.push_back(lo); + work_element = std::vector(); + } else { + work_element.emplace_back(hi); + preimage_data.push_back(field_pt::accumulate(work_element)); + field_t lo_shift(context, + barretenberg::fr(uint256_t(1ULL) << ((bits_per_element - (uint64_t)current_bit_counter)))); + work_element = std::vector(); + work_element.emplace_back(lo * lo_shift); + } + }; +}; + template struct evaluation_domain { static evaluation_domain from_witness(Composer* ctx, const barretenberg::evaluation_domain& input) { @@ -51,44 +204,6 @@ template struct evaluation_domain { return domain; } - field_t compress() const - { - if constexpr (Composer::type == ComposerType::PLOOKUP) { - field_t out = pedersen_plookup_commitment::compress({ - root, - domain, - generator, - }); - return out; - } else { - field_t out = pedersen_commitment::compress({ - root, - domain, - generator, - }); - return out; - } - } - - static barretenberg::fr compress_native(const barretenberg::evaluation_domain& input) - { - barretenberg::fr out; - if constexpr (Composer::type == ComposerType::PLOOKUP) { - out = crypto::pedersen_commitment::lookup::compress_native({ - input.root, - input.domain, - input.generator, - }); - } else { - out = crypto::pedersen_commitment::compress_native({ - input.root, - input.domain, - input.generator, - }); - } - return out; - } - field_t root; field_t root_inverse; field_t domain; @@ -120,9 +235,15 @@ template struct verification_key { key->num_public_inputs = witness_t(ctx, input_key->num_public_inputs); key->domain = evaluation_domain::from_witness(ctx, input_key->domain); key->contains_recursive_proof = witness_t(ctx, input_key->contains_recursive_proof); - for (const auto& [tag, value] : input_key->commitments) { - key->commitments.insert({ tag, Curve::g1_ct::from_witness(ctx, value) }); + // We do not perform on_curve() circuit checks when constructing the Curve::g1_ct element. + // The assumption is that the circuit creator is honest and that the verification key hash (or some other + // method) will be used to ensure the provided key matches the key produced by the circuit creator. + // If the circuit creator is not honest, the entire set of circuit constraints being proved over cannot be + // trusted! + const typename Curve::fq_ct x = Curve::fq_ct::from_witness(ctx, value.x); + const typename Curve::fq_ct y = Curve::fq_ct::from_witness(ctx, value.y); + key->commitments.insert({ tag, typename Curve::g1_ct(x, y) }); } return key; @@ -189,71 +310,65 @@ template struct verification_key { public: field_t compress(size_t const hash_index = 0) { - field_t compressed_domain = domain.compress(); - - std::vector> preimage_data; - preimage_data.push_back(Composer::type); - preimage_data.push_back(compressed_domain); - preimage_data.push_back(num_public_inputs); + PedersenPreimageBuilder preimage_buffer(context); + + field_t composer_type = witness_t::create_constant_witness(context, Composer::type); + domain.generator.create_range_constraint(16, "domain.generator"); + domain.domain.create_range_constraint(32, "domain.generator"); + num_public_inputs.create_range_constraint(32, "num_public_inputs"); + preimage_buffer.add_element_with_existing_range_constraint(composer_type, 8); + preimage_buffer.add_element_with_existing_range_constraint(domain.generator, 16); // coset generator is small + preimage_buffer.add_element_with_existing_range_constraint(domain.domain, 32); + preimage_buffer.add_element_with_existing_range_constraint(num_public_inputs, 32); + constexpr size_t limb_bits = Curve::fq_ct::NUM_LIMB_BITS; + constexpr size_t last_limb_bits = 256 - (limb_bits * 3); for (const auto& [tag, selector] : commitments) { - preimage_data.push_back(selector.x.binary_basis_limbs[0].element); - preimage_data.push_back(selector.x.binary_basis_limbs[1].element); - preimage_data.push_back(selector.x.binary_basis_limbs[2].element); - preimage_data.push_back(selector.x.binary_basis_limbs[3].element); - preimage_data.push_back(selector.y.binary_basis_limbs[0].element); - preimage_data.push_back(selector.y.binary_basis_limbs[1].element); - preimage_data.push_back(selector.y.binary_basis_limbs[2].element); - preimage_data.push_back(selector.y.binary_basis_limbs[3].element); - } - - field_t compressed_key; - if constexpr (Composer::type == ComposerType::PLOOKUP) { - compressed_key = pedersen_plookup_commitment::compress(preimage_data, hash_index); - } else { - compressed_key = pedersen_commitment::compress(preimage_data, hash_index); + const auto& x = selector.x; + const auto& y = selector.y; + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[3].element, last_limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[2].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[1].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(y.binary_basis_limbs[0].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[3].element, last_limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[2].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[1].element, limb_bits); + preimage_buffer.add_element_with_existing_range_constraint(x.binary_basis_limbs[0].element, limb_bits); } + preimage_buffer.add_element(domain.root); + field_t compressed_key = preimage_buffer.compress(hash_index); return compressed_key; } - static barretenberg::fr compress_native(const std::shared_ptr& key, - const size_t hash_index = 0) + static barretenberg::fr compress_native(const std::shared_ptr& key, const size_t = 0) { - barretenberg::fr compressed_domain = evaluation_domain::compress_native(key->domain); - - constexpr size_t num_limb_bits = bn254::fq_ct::NUM_LIMB_BITS; - const auto split_bigfield_limbs = [](const uint256_t& element) { - std::vector limbs; - limbs.push_back(element.slice(0, num_limb_bits)); - limbs.push_back(element.slice(num_limb_bits, num_limb_bits * 2)); - limbs.push_back(element.slice(num_limb_bits * 2, num_limb_bits * 3)); - limbs.push_back(element.slice(num_limb_bits * 3, num_limb_bits * 4)); - return limbs; - }; - - std::vector preimage_data; - preimage_data.push_back(Composer::type); - preimage_data.push_back(compressed_domain); - preimage_data.push_back(key->num_public_inputs); + std::vector preimage_data; + + preimage_data.push_back(static_cast(Composer::type)); + + const uint256_t domain = key->domain.domain; + const uint256_t generator = key->domain.generator; + const uint256_t num_public_inputs = key->num_public_inputs; + + ASSERT(domain < (uint256_t(1) << 32)); + ASSERT(generator < (uint256_t(1) << 16)); + ASSERT(num_public_inputs < (uint256_t(1) << 32)); + + write(preimage_data, static_cast(uint256_t(key->domain.generator))); + write(preimage_data, static_cast(uint256_t(key->domain.domain))); + write(preimage_data, static_cast(key->num_public_inputs)); for (const auto& [tag, selector] : key->commitments) { - const auto x_limbs = split_bigfield_limbs(selector.x); - const auto y_limbs = split_bigfield_limbs(selector.y); - - preimage_data.push_back(x_limbs[0]); - preimage_data.push_back(x_limbs[1]); - preimage_data.push_back(x_limbs[2]); - preimage_data.push_back(x_limbs[3]); - - preimage_data.push_back(y_limbs[0]); - preimage_data.push_back(y_limbs[1]); - preimage_data.push_back(y_limbs[2]); - preimage_data.push_back(y_limbs[3]); + write(preimage_data, selector.y); + write(preimage_data, selector.x); } + write(preimage_data, key->domain.root); + barretenberg::fr compressed_key; if constexpr (Composer::type == ComposerType::PLOOKUP) { - compressed_key = crypto::pedersen_commitment::lookup::compress_native(preimage_data, hash_index); + compressed_key = + from_buffer(crypto::pedersen_commitment::lookup::compress_native(preimage_data)); } else { - compressed_key = crypto::pedersen_commitment::compress_native(preimage_data, hash_index); + compressed_key = crypto::pedersen_commitment::compress_native(preimage_data); } return compressed_key; } diff --git a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.hpp b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.hpp index aa185e34d7c..009e732b209 100644 --- a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.hpp +++ b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.hpp @@ -37,7 +37,6 @@ void populate_kate_element_map(typename Curve::Composer* ctx, typename Curve::fr_ct& batch_opening_scalar) { using fr_ct = typename Curve::fr_ct; - using g1_ct = typename Curve::g1_ct; const auto& polynomial_manifest = key->polynomial_manifest; for (size_t i = 0; i < key->polynomial_manifest.size(); ++i) { const auto& item = polynomial_manifest[i]; @@ -45,14 +44,14 @@ void populate_kate_element_map(typename Curve::Composer* ctx, const std::string poly_label(item.polynomial_label); switch (item.source) { case PolynomialSource::WITNESS: { - const auto element = transcript.get_group_element(label); - ASSERT(element.on_curve()); - if (element.is_point_at_infinity()) { + // get_circuit_group_element validates that the point produced lies on the curve + const auto element = transcript.get_circuit_group_element(label); + ASSERT(element.get_value().on_curve()); + if (element.get_value().is_point_at_infinity()) { std::cerr << label << " witness is point at infinity! Error!" << std::endl; ctx->failure("witness " + label + " is point at infinity"); } - // g1_ct::from_witness validates that the point produced lies on the curve - kate_g1_elements.insert({ label, g1_ct::from_witness(ctx, element) }); + kate_g1_elements.insert({ label, element }); break; } case PolynomialSource::SELECTOR: @@ -89,15 +88,15 @@ void populate_kate_element_map(typename Curve::Composer* ctx, fr_ct z_power = 1; for (size_t i = 0; i < program_settings::program_width; ++i) { std::string quotient_label = "T_" + std::to_string(i + 1); - const auto element = transcript.get_group_element(quotient_label); + const auto element = transcript.get_circuit_group_element(quotient_label); - kate_g1_elements.insert({ quotient_label, g1_ct::from_witness(ctx, element) }); + kate_g1_elements.insert({ quotient_label, element }); kate_fr_elements_at_zeta_large.insert({ quotient_label, quotient_nu * z_power }); z_power *= key->z_pow_n; } - const auto PI_Z = transcript.get_group_element("PI_Z"); - const auto PI_Z_OMEGA = transcript.get_group_element("PI_Z_OMEGA"); + const auto PI_Z = transcript.get_circuit_group_element("PI_Z"); + const auto PI_Z_OMEGA = transcript.get_circuit_group_element("PI_Z_OMEGA"); fr_ct u = transcript.get_challenge_field_element("separator", 0); @@ -105,10 +104,10 @@ void populate_kate_element_map(typename Curve::Composer* ctx, proof_system::plonk::compute_kate_batch_evaluation(key, transcript); batch_opening_scalar = -batch_evaluation; - kate_g1_elements.insert({ "PI_Z_OMEGA", g1_ct::from_witness(ctx, PI_Z_OMEGA) }); + kate_g1_elements.insert({ "PI_Z_OMEGA", PI_Z_OMEGA }); kate_fr_elements_at_zeta_large.insert({ "PI_Z_OMEGA", zeta * key->domain.root * u }); - kate_g1_elements.insert({ "PI_Z", g1_ct::from_witness(ctx, PI_Z) }); + kate_g1_elements.insert({ "PI_Z", PI_Z }); kate_fr_elements_at_zeta.insert({ "PI_Z", zeta }); } @@ -287,18 +286,6 @@ aggregation_state verify_proof(typename Curve::Composer* context, for (const auto& [label, fr_value] : kate_fr_elements_at_zeta_omega) { const auto& g1_value = kate_g1_elements[label]; - // if (fr_value.get_value() == 0 && fr_value.witness_index != IS_CONSTANT ) - // { - // std::cerr << "bad scalar zero at " << label << std::endl; - // } - // if (fr_value.get_value() == 0 && fr_value.witness_index == IS_CONSTANT) { - // std::cerr << "scalar zero at " << label << std::endl; - // continue; - // } - - // if (fr_value.get_value() == 0 && fr_value.witness_index == IS_CONSTANT) { - // continue; - // } double_opening_scalars.emplace_back(fr_value); double_opening_elements.emplace_back(g1_value); } @@ -320,8 +307,7 @@ aggregation_state verify_proof(typename Curve::Composer* context, opening_elements.push_back(previous_output.P0); opening_scalars.push_back(random_separator); - rhs_elements.push_back( - (-(previous_output.P1)).reduce()); // TODO: use .normalize() instead? (As per defi bridge project) + rhs_elements.push_back((-(previous_output.P1))); rhs_scalars.push_back(random_separator); } @@ -344,6 +330,10 @@ aggregation_state verify_proof(typename Curve::Composer* context, const fr_ct l1 = public_inputs[idx1]; const fr_ct l2 = public_inputs[idx2]; const fr_ct l3 = public_inputs[idx3]; + l0.create_range_constraint(fq_ct::NUM_LIMB_BITS, "l0"); + l1.create_range_constraint(fq_ct::NUM_LIMB_BITS, "l1"); + l2.create_range_constraint(fq_ct::NUM_LIMB_BITS, "l2"); + l3.create_range_constraint(fq_ct::NUM_LAST_LIMB_BITS, "l3"); return fq_ct(l0, l1, l2, l3, false); }; @@ -369,7 +359,7 @@ aggregation_state verify_proof(typename Curve::Composer* context, opening_elements.push_back(g1_ct(x0, y0)); opening_scalars.push_back(recursion_separator_challenge); - rhs_elements.push_back((-g1_ct(x1, y1)).normalize()); + rhs_elements.push_back((-g1_ct(x1, y1))); rhs_scalars.push_back(recursion_separator_challenge); } @@ -380,13 +370,13 @@ aggregation_state verify_proof(typename Curve::Composer* context, for (const auto& to_add : elements_to_add) { opening_result = opening_result + to_add; } - opening_result = opening_result.normalize(); g1_ct rhs = g1_ct::template wnaf_batch_mul<128>(rhs_elements, rhs_scalars); - rhs = rhs + PI_Z; - rhs = (-rhs).normalize(); - std::vector proof_witness_indices{ + rhs = (-rhs) - PI_Z; + + // TODO(zac: remove this once a3-packages has migrated to calling `assign_object_to_proof_outputs`) + std::vector proof_witness_indices = { opening_result.x.binary_basis_limbs[0].element.normalize().witness_index, opening_result.x.binary_basis_limbs[1].element.normalize().witness_index, opening_result.x.binary_basis_limbs[2].element.normalize().witness_index, @@ -404,10 +394,10 @@ aggregation_state verify_proof(typename Curve::Composer* context, rhs.y.binary_basis_limbs[2].element.normalize().witness_index, rhs.y.binary_basis_limbs[3].element.normalize().witness_index, }; - - return aggregation_state{ - opening_result, rhs, transcript.get_field_element_vector("public_inputs"), proof_witness_indices, true, + auto result = aggregation_state{ + opening_result, rhs, transcript.get_field_element_vector("public_inputs"), proof_witness_indices, true }; + return result; } } // namespace recursion diff --git a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.test.cpp b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.test.cpp index 82e8042b02f..db3dfc2cc1e 100644 --- a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.test.cpp +++ b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier.test.cpp @@ -212,6 +212,8 @@ template class stdlib_verifier : public testing::Test { stdlib::recursion::verify_proof( &outer_composer, verification_key_b, recursive_manifest, recursive_proof_b, previous_output); + verification_key_b->compress(); + verification_key->compress(); return { output, verification_key }; } @@ -301,7 +303,7 @@ template class stdlib_verifier : public testing::Test { EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); - circuit_output.aggregation_state.add_proof_outputs_as_public_inputs(); + circuit_output.aggregation_state.assign_object_to_proof_outputs(); EXPECT_EQ(outer_composer.failed(), false); @@ -346,7 +348,7 @@ template class stdlib_verifier : public testing::Test { EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); - circuit_output.aggregation_state.add_proof_outputs_as_public_inputs(); + circuit_output.aggregation_state.assign_object_to_proof_outputs(); EXPECT_EQ(outer_composer.failed(), false); @@ -379,6 +381,9 @@ template class stdlib_verifier : public testing::Test { InnerComposer inner_composer_a = InnerComposer("../srs_db/ignition"); InnerComposer inner_composer_b = InnerComposer("../srs_db/ignition"); + OuterComposer mid_composer_a = OuterComposer("../srs_db/ignition"); + OuterComposer mid_composer_b = OuterComposer("../srs_db/ignition"); + OuterComposer outer_composer = OuterComposer("../srs_db/ignition"); std::vector inner_inputs{ barretenberg::fr::random_element(), @@ -388,7 +393,27 @@ template class stdlib_verifier : public testing::Test { create_inner_circuit(inner_composer_a, inner_inputs); create_inner_circuit(inner_composer_b, inner_inputs); - auto circuit_output = create_double_outer_circuit(inner_composer_a, inner_composer_b, outer_composer); + auto circuit_output_a = create_outer_circuit(inner_composer_a, mid_composer_a); + + uint256_t a0 = circuit_output_a.aggregation_state.P0.x.binary_basis_limbs[1].element.get_value(); + uint256_t a1 = circuit_output_a.aggregation_state.P0.y.binary_basis_limbs[1].element.get_value(); + uint256_t a2 = circuit_output_a.aggregation_state.P1.x.binary_basis_limbs[1].element.get_value(); + uint256_t a3 = circuit_output_a.aggregation_state.P1.y.binary_basis_limbs[1].element.get_value(); + + ASSERT(a0.get_msb() <= 68); + ASSERT(a1.get_msb() <= 68); + ASSERT(a2.get_msb() <= 68); + ASSERT(a3.get_msb() <= 68); + + circuit_output_a.aggregation_state.assign_object_to_proof_outputs(); + + auto circuit_output_b = create_outer_circuit(inner_composer_b, mid_composer_b); + + circuit_output_b.aggregation_state.assign_object_to_proof_outputs(); + + auto circuit_output = create_double_outer_circuit(mid_composer_a, mid_composer_b, outer_composer); + + circuit_output.aggregation_state.assign_object_to_proof_outputs(); g1::affine_element P[2]; P[0].x = barretenberg::fq(circuit_output.aggregation_state.P0.x.get_value().lo); @@ -398,8 +423,8 @@ template class stdlib_verifier : public testing::Test { barretenberg::fq12 inner_proof_result = barretenberg::pairing::reduced_ate_pairing_batch_precomputed( P, circuit_output.verification_key->reference_string->get_precomputed_g2_lines(), 2); - EXPECT_EQ(circuit_output.aggregation_state.public_inputs[0].get_value(), inner_inputs[0]); - EXPECT_EQ(circuit_output.aggregation_state.public_inputs[1].get_value(), inner_inputs[1]); + EXPECT_EQ(circuit_output_a.aggregation_state.public_inputs[0].get_value(), inner_inputs[0]); + EXPECT_EQ(circuit_output_a.aggregation_state.public_inputs[1].get_value(), inner_inputs[1]); EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); @@ -456,7 +481,6 @@ template class stdlib_verifier : public testing::Test { EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); printf("composer gates = %zu\n", outer_composer.get_num_gates()); - auto prover = outer_composer.create_prover(); auto verifier = outer_composer.create_verifier(); @@ -638,14 +662,23 @@ HEAVY_TYPED_TEST(stdlib_verifier, recursive_proof_composition) HEAVY_TYPED_TEST(stdlib_verifier, recursive_proof_composition_ultra_no_tables) { - TestFixture::test_recursive_proof_composition_ultra_no_tables(); + if constexpr (TypeParam::type == ComposerType::PLOOKUP) { + TestFixture::test_recursive_proof_composition_ultra_no_tables(); + } else { + // no point running this if we're not in UltraPlonk + GTEST_SKIP(); + } }; -// CircleCI can't cope with this. -// HEAVY_TYPED_TEST(stdlib_verifier, double_verification) -// { -// TestFixture::test_double_verification(); -// }; +HEAVY_TYPED_TEST(stdlib_verifier, double_verification) +{ + if constexpr (TypeParam::type == ComposerType::PLOOKUP) { + TestFixture::test_double_verification(); + } else { + // CircleCI can't cope with non-ultraplonk version. + GTEST_SKIP(); + } +}; HEAVY_TYPED_TEST(stdlib_verifier, recursive_proof_composition_with_variable_verification_key_a) { diff --git a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier_turbo.test.cpp b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier_turbo.test.cpp index 1f4410148b4..36c128f3ea0 100644 --- a/cpp/src/barretenberg/stdlib/recursion/verifier/verifier_turbo.test.cpp +++ b/cpp/src/barretenberg/stdlib/recursion/verifier/verifier_turbo.test.cpp @@ -188,7 +188,7 @@ template class stdlib_verifier_turbo : public testing:: EXPECT_EQ(inner_proof_result, barretenberg::fq12::one()); - circuit_output.aggregation_state.add_proof_outputs_as_public_inputs(); + circuit_output.aggregation_state.assign_object_to_proof_outputs(); EXPECT_EQ(outer_composer.failed(), false); std::cout << "creating prover" << std::endl; diff --git a/cpp/src/barretenberg/transcript/transcript.cpp b/cpp/src/barretenberg/transcript/transcript.cpp index 9c448949c91..8f597c454b6 100644 --- a/cpp/src/barretenberg/transcript/transcript.cpp +++ b/cpp/src/barretenberg/transcript/transcript.cpp @@ -46,10 +46,12 @@ std::array Keccak256Hasher::hash(std std::array Blake3sHasher::hash(std::vector const& buffer) { - std::vector hash_result = blake3::blake3s(buffer); + grumpkin::fq input = grumpkin::fq::serialize_from_buffer(&buffer[0]); + grumpkin::fq compressed = crypto::pedersen_commitment::compress_native({ input }); + std::vector res = to_buffer(compressed); std::array result; for (size_t i = 0; i < PRNG_OUTPUT_SIZE; ++i) { - result[i] = hash_result[i]; + result[i] = res[i]; } return result; } @@ -59,10 +61,12 @@ std::array Blake3sHasher::hash_plookup // TODO(@zac-williamson) Change to call a Poseidon hash and create a PoseidonHasher // (not making the name change right now as it will break concurrent work w. getting recursion working in Noir) // We also need to implement a Poseidon gadget - std::vector compressed_buffer = crypto::pedersen_commitment::lookup::compress_native(buffer); + grumpkin::fq input = grumpkin::fq::serialize_from_buffer(&buffer[0]); + grumpkin::fq compressed = crypto::pedersen_commitment::lookup::compress_native({ input }); + std::vector res = to_buffer(compressed); std::array result; for (size_t i = 0; i < PRNG_OUTPUT_SIZE; ++i) { - result[i] = compressed_buffer[i]; + result[i] = res[i]; } return result; } @@ -259,7 +263,11 @@ void Transcript::apply_fiat_shamir(const std::string& challenge_name /*, const b } std::vector rolling_buffer(base_hash.begin(), base_hash.end()); - rolling_buffer.push_back(0); + if (hasher == HashType::Keccak256) { + rolling_buffer.push_back(0); + } else { + rolling_buffer[31] = (0); + } // Compute how many hashes we need so that we have enough distinct chunks of 'random' bytes to distribute // across the num_challenges. diff --git a/sol/src/ultra/keys/RecursiveUltraVerificationKey.sol b/sol/src/ultra/keys/RecursiveUltraVerificationKey.sol index 4b118cdef60..748d20fcb90 100644 --- a/sol/src/ultra/keys/RecursiveUltraVerificationKey.sol +++ b/sol/src/ultra/keys/RecursiveUltraVerificationKey.sol @@ -1,72 +1,72 @@ -// Verification Key Hash: 507de35addf16b79526d713259492d5d1764fdb6ce55ff4ccb03c147b72f381a +// Verification Key Hash: b665bc769f274feb94ea7f9997fa684b414aa8b9b9bac0227c7ce2e1cbd3d115 // SPDX-License-Identifier: Apache-2.0 // Copyright 2022 Aztec pragma solidity >=0.8.4; library RecursiveUltraVerificationKey { function verificationKeyHash() internal pure returns (bytes32) { - return 0x507de35addf16b79526d713259492d5d1764fdb6ce55ff4ccb03c147b72f381a; + return 0xb665bc769f274feb94ea7f9997fa684b414aa8b9b9bac0227c7ce2e1cbd3d115; } function loadVerificationKey(uint256 _vk, uint256 _omegaInverseLoc) internal pure { assembly { - mstore(add(_vk, 0x00), 0x0000000000000000000000000000000000000000000000000000000000080000) // vk.circuit_size + mstore(add(_vk, 0x00), 0x0000000000000000000000000000000000000000000000000000000000040000) // vk.circuit_size mstore(add(_vk, 0x20), 0x0000000000000000000000000000000000000000000000000000000000000010) // vk.num_inputs - mstore(add(_vk, 0x40), 0x2260e724844bca5251829353968e4915305258418357473a5c1d597f613f6cbd) // vk.work_root - mstore(add(_vk, 0x60), 0x3064486657634403844b0eac78ca882cfd284341fcb0615a15cfcd17b14d8201) // vk.domain_inverse - mstore(add(_vk, 0x80), 0x18fe72968b540c1dad6c7648fcb3407edfc489d8dcf3fdce314c1f0e72684c43) // vk.Q1.x - mstore(add(_vk, 0xa0), 0x16f49263ee016852edfed2e84bf44c22b31064b9034b62059329b2af2f349c37) // vk.Q1.y - mstore(add(_vk, 0xc0), 0x1c382676d0f8e5691def3a60d533850f573c36aa200ab364c091acc4a7eb094f) // vk.Q2.x - mstore(add(_vk, 0xe0), 0x17c05ca7ea679681a3cf772fabcf2c1a988e39910f1ba8de3d1f68ffb0effda1) // vk.Q2.y - mstore(add(_vk, 0x100), 0x257d75dead2d8cbb2f63b3592a762a2c2dbe0195a533736fd01982370e768676) // vk.Q3.x - mstore(add(_vk, 0x120), 0x258b6d74446f5e532bce6e1a62372a82986eac9801c13a8553f373c30398a47c) // vk.Q3.y - mstore(add(_vk, 0x140), 0x290ff6a808f6abe7508a8c884ea0fc2f819e23a5b6d7c2dd1105da2a3f0637e0) // vk.Q4.x - mstore(add(_vk, 0x160), 0x2e6c3c419be44ed56b61069a06e980360f58830ad52b38bb69de92c456ebf0ca) // vk.Q4.y - mstore(add(_vk, 0x180), 0x282e6e14bbedfc7ef013feb4877ce9098389abfd3ad8899c957be4fdb20d0454) // vk.Q_M.x - mstore(add(_vk, 0x1a0), 0x2483d06975c3965d3f2d205ddeff196b90ca5883878bffc0bd190a357fee947e) // vk.Q_M.y - mstore(add(_vk, 0x1c0), 0x09af8fed71838d47b0052d8e3fdda11f55c62a6f2cb9aab24edd90b5e9640e9c) // vk.Q_C.x - mstore(add(_vk, 0x1e0), 0x2bdf7549fa146188dd750d032d9dec911c5799ca99f72405c4ac49f3f9e3a51a) // vk.Q_C.y - mstore(add(_vk, 0x200), 0x1479a535c87c413301d82c5ae1598b46c03117a57b878416d1143bb48f1df8bf) // vk.Q_ARITHMETIC.x - mstore(add(_vk, 0x220), 0x03203e3c02cc68282d93507d0ad9d56304d5a4b2908233bcb6f8682f8b264532) // vk.Q_ARITHMETIC.y - mstore(add(_vk, 0x240), 0x0cccd1de3f4ef2a2bfffbb7a91f8be2c49e9dc9b565ba4312015a88558f40d20) // vk.QSORT.x - mstore(add(_vk, 0x260), 0x092c5bd4edb996d6c1189a2682f6e93ede4b9aff7f07823605c894f833316718) // vk.QSORT.y - mstore(add(_vk, 0x280), 0x20089848d81ee4e8d7700679e7b5ed017916e2ee28bf76c0e0f4862274637bb8) // vk.Q_ELLIPTIC.x - mstore(add(_vk, 0x2a0), 0x0faae100924d24a70708e49a08ba2ba9df261088bf04e7b4c3f811cc0d8995fe) // vk.Q_ELLIPTIC.y - mstore(add(_vk, 0x2c0), 0x2de71f46452329536fe14dfff808692c405b9ef1ae47c451be8383ded868af5c) // vk.Q_AUX.x - mstore(add(_vk, 0x2e0), 0x0a520e2f877f19cc69aad2396bf741e6864a9f0b657887e80165b794f7612e71) // vk.Q_AUX.y - mstore(add(_vk, 0x300), 0x2779b1b7b8433eeee7333a1372feb4587da74e2c93cc54917e201748ed847204) // vk.SIGMA1.x - mstore(add(_vk, 0x320), 0x2198823f66ad59612f6cb77aff9437388abdbcc4d8f6eac792d8bca7d1b341d9) // vk.SIGMA1.y - mstore(add(_vk, 0x340), 0x1f6732b9d128931b2e32b2cae73b029720cca3cef23fee25363d520ed0ba3f92) // vk.SIGMA2.x - mstore(add(_vk, 0x360), 0x15fb336844e68b08361c10b83e7d6ea0f011958774e58e5f7c43e6606e989ecc) // vk.SIGMA2.y - mstore(add(_vk, 0x380), 0x0984b1b6c723afb4713656abf30b06e2ad04c054dd3acf016a6db1ee7111ca11) // vk.SIGMA3.x - mstore(add(_vk, 0x3a0), 0x03421d01f19c6b91e477648819f57d888b3b23b67599266293bddf91a2636ff1) // vk.SIGMA3.y - mstore(add(_vk, 0x3c0), 0x2f77cda90d366b151b17c5667f10526ab0fe144aecb307e00ede6039365bcfa0) // vk.SIGMA4.x - mstore(add(_vk, 0x3e0), 0x0d1e8f758babcbbf134dfe341c262ee25d0254cba8f5487ad5bddd190f27a9e8) // vk.SIGMA4.y - mstore(add(_vk, 0x400), 0x2f61a890b9f1dff4ef5c8b0eafe9b71c7a23dc4c8a6791d9c01418310f4a7b2e) // vk.TABLE1.x - mstore(add(_vk, 0x420), 0x07c8a51d1881fcdfe1cb7dcefc48a44047c7f5386797d5f8553ce2e12e8daba0) // vk.TABLE1.y - mstore(add(_vk, 0x440), 0x1adf56913dea23b7b14c952933b0b40fc476dc2697a758ec9df73802b0596c2f) // vk.TABLE2.x - mstore(add(_vk, 0x460), 0x212a1759e19285a35a70a245cca6477f89b6f156e4425cf52cfccb4594f59152) // vk.TABLE2.y - mstore(add(_vk, 0x480), 0x1527f8c19085ac209ebddbccae4dd0ca58b078e56fd20d651ce3a3194697b191) // vk.TABLE3.x - mstore(add(_vk, 0x4a0), 0x02247dca9c3cb09318aa6100a2a7c628281c69bc41cfda34aa72c263b69344b4) // vk.TABLE3.y - mstore(add(_vk, 0x4c0), 0x12eea56d2ada3befa5db215ea5ebbd37b5ce95fcd1cf7adb94d5a1784876b4f7) // vk.TABLE4.x - mstore(add(_vk, 0x4e0), 0x190df1146fbdd5cc79e8817ebcd6311e35cf5cc38795cee26371a707d685e05a) // vk.TABLE4.y - mstore(add(_vk, 0x500), 0x019b3a1970f9f77b13538cd8071ea3ee7c556fd98009e2a04be044ead0a94623) // vk.TABLE_TYPE.x - mstore(add(_vk, 0x520), 0x159cbdae3e194fe45524a171befdcb98b55c8d495fc463c98ac690eee947119f) // vk.TABLE_TYPE.y - mstore(add(_vk, 0x540), 0x16b2f7fa29f578aae3d4c0b8220101570adfcc9e8aa8a148267208540de189f1) // vk.ID1.x - mstore(add(_vk, 0x560), 0x2344a211fbbacc281de980197e4f12155d90d55a67f4ad08398bac665f813953) // vk.ID1.y - mstore(add(_vk, 0x580), 0x1af709df675db1688b95927324e71c5e551436ba7cb32478570a9cfaebf90614) // vk.ID2.x - mstore(add(_vk, 0x5a0), 0x2b83e76f61aa5cd70218c38e693ae0a99e9a2f4a192af5c77dbd27fa605fdae4) // vk.ID2.y - mstore(add(_vk, 0x5c0), 0x038c89635a8b6ec9766d5f98d13c16f8c312088f830610de72c00edf8c3b7800) // vk.ID3.x - mstore(add(_vk, 0x5e0), 0x1863d9217ba6c6764fa02298efe25fabfbe454a27431b970a6afff5d1986fadb) // vk.ID3.y - mstore(add(_vk, 0x600), 0x259a5dd47d44d6240407c26718201a122fb4b6b38d838f6e24d1c75515016761) // vk.ID4.x - mstore(add(_vk, 0x620), 0x14db344b735ffe084107e5cea07b00e4c41a82f0073f76e0536cd7118d78866f) // vk.ID4.y + mstore(add(_vk, 0x40), 0x19ddbcaf3a8d46c15c0176fbb5b95e4dc57088ff13f4d1bd84c6bfa57dcdc0e0) // vk.work_root + mstore(add(_vk, 0x60), 0x30644259cd94e7dd5045d7a27013b7fcd21c9e3b7fa75222e7bda49b729b0401) // vk.domain_inverse + mstore(add(_vk, 0x80), 0x16f7fc6133c8fb2dab06c57392df697a53357ecd918d749d1c981dcd0ee6d849) // vk.Q1.x + mstore(add(_vk, 0xa0), 0x2ba047103f9f86b84058d718a082e2faa53e50109e7cb880d2cbb7a1bf98da89) // vk.Q1.y + mstore(add(_vk, 0xc0), 0x1b9d146737dbb7759e0cad93ad4a7669880a062aceb7b46b8485327976d7285c) // vk.Q2.x + mstore(add(_vk, 0xe0), 0x11de7c3d638acc90e7f844c08658d0588da864268e00576d26aaca3cf49af350) // vk.Q2.y + mstore(add(_vk, 0x100), 0x1466840d8ad2dfde3a55d4c98412a05807bbe8aac33c27ba100c1e621fbebba0) // vk.Q3.x + mstore(add(_vk, 0x120), 0x2198ce44955b8ac6e21ddcbb66acd9df7596ad9e5fcf22f2227e8bbb51fe44ee) // vk.Q3.y + mstore(add(_vk, 0x140), 0x18b96a49db3644e2986f811b8c104e8eb88aa5eb9aec0ca109322a64885688bd) // vk.Q4.x + mstore(add(_vk, 0x160), 0x2ffec963826849cabd279a2b9f9a26f81518eb65d882f47a32470fc52f53def0) // vk.Q4.y + mstore(add(_vk, 0x180), 0x09dd725897471fddc177b241d7abc402705acfa452707388fa62666ad454598c) // vk.Q_M.x + mstore(add(_vk, 0x1a0), 0x03a46eb7ed69136e109e2761fb707da7cee18b3d05e581f24d77853b3b03581e) // vk.Q_M.y + mstore(add(_vk, 0x1c0), 0x304db51670cb2c59e3088431803e82bce8c81b38eefa267871ae2103ca7842ca) // vk.Q_C.x + mstore(add(_vk, 0x1e0), 0x1d7ec7d8d4a74e337de26b7adaecb8beb03d8cd647aa180bc08de840038710d5) // vk.Q_C.y + mstore(add(_vk, 0x200), 0x1db65122bf0f0a58fe07bd7342d3e26b07923041cb7d2158d13fb7b5328da40e) // vk.Q_ARITHMETIC.x + mstore(add(_vk, 0x220), 0x1691db1eeedbcb4f7646959cf363c00b7e26812a225edf5a6972d815270770f5) // vk.Q_ARITHMETIC.y + mstore(add(_vk, 0x240), 0x2a63b6a306e30d87f4b8597cbd1dcecff5fc7cacb774247fca6531e3d347ada4) // vk.QSORT.x + mstore(add(_vk, 0x260), 0x2849d2901fcd1f048924fb77e9451ad45d80f9f842418146b1fde0a7c752fc5f) // vk.QSORT.y + mstore(add(_vk, 0x280), 0x0e42866979ddac27ac729352dd0f844da4fb5a1c3e2480b5b940acd12304c700) // vk.Q_ELLIPTIC.x + mstore(add(_vk, 0x2a0), 0x017ac9a40547e866bdb914dc2b73661c0ec8aa67956c8c9bf406795f75e15c53) // vk.Q_ELLIPTIC.y + mstore(add(_vk, 0x2c0), 0x1ad08199bf79952adff0aa3a9c04a26f18ad7deed1fbed0548f2c83ddf913ef9) // vk.Q_AUX.x + mstore(add(_vk, 0x2e0), 0x151df9277b110c615c058f7f783105d03cab938f23884afed1897d0049715d21) // vk.Q_AUX.y + mstore(add(_vk, 0x300), 0x0bd26d62138b721fdc08fd7d52cd3dfaa37399eb416af0ec6237f9ec1a63a5c0) // vk.SIGMA1.x + mstore(add(_vk, 0x320), 0x103282cd2ef4210ac390d70a1cba58c6792a5d872ae0337615f8ac9997d300ef) // vk.SIGMA1.y + mstore(add(_vk, 0x340), 0x08abaa91c69ffa73d80d9a9562020c2a104771f07cf4099cbbe9a0071befb1cc) // vk.SIGMA2.x + mstore(add(_vk, 0x360), 0x1a82e5cd4a2c3de77afb2ca76c89b54991a4db3939a5c24806af01a0f69a2366) // vk.SIGMA2.y + mstore(add(_vk, 0x380), 0x26d50e2d19c429d1a2987d5249b88e388f93339fc05f52939fa2e1f4be653918) // vk.SIGMA3.x + mstore(add(_vk, 0x3a0), 0x0a49cd57e79633ea43cc3172e819327ce260682d8b571d0964678a153c17e959) // vk.SIGMA3.y + mstore(add(_vk, 0x3c0), 0x1c82f3e7c57b08ef90fda6fe39427b815a835c8559b64eac0a4b213998f6802c) // vk.SIGMA4.x + mstore(add(_vk, 0x3e0), 0x098bad014a270b6f5e4c90cbd299c15c5fd190457f0e78a5f849243e86688868) // vk.SIGMA4.y + mstore(add(_vk, 0x400), 0x215a055ec0bf7d7ab5e005b4260258aaadfd8ae9005a09060fdd0cee02dc3fea) // vk.TABLE1.x + mstore(add(_vk, 0x420), 0x1841eba177a34b1eb908727fe2e54bf33fc82b6e58dfd044acd4ba05ca80c837) // vk.TABLE1.y + mstore(add(_vk, 0x440), 0x018eb037682044ebf9cad76f777bf379b94c4d31d4351ce9677ff146a744555c) // vk.TABLE2.x + mstore(add(_vk, 0x460), 0x2bf87d72f0aef257c728503c900516f9274ab06eb54804651218438e40f06c25) // vk.TABLE2.y + mstore(add(_vk, 0x480), 0x13b003b384fb50e00994bf62a0057f44344be47383d59a7e9f1319d710ab5263) // vk.TABLE3.x + mstore(add(_vk, 0x4a0), 0x1a5f338a3d05fb46ea46855e6c36dbdb23c5f20a56acc795324fe2958189ec39) // vk.TABLE3.y + mstore(add(_vk, 0x4c0), 0x1365fd683dbad2c4c55b02dd33c4b96fde00e5bb3f52be20ead95484e130aee1) // vk.TABLE4.x + mstore(add(_vk, 0x4e0), 0x2da2ba1d27548e452cc863758acf156eb268f577b7d08ba58e7bbf2d28f6f23c) // vk.TABLE4.y + mstore(add(_vk, 0x500), 0x0ef908712f03ce2e4db3ef557abbde7c584d8c831165ba40ab43124526c53cc1) // vk.TABLE_TYPE.x + mstore(add(_vk, 0x520), 0x009dd642bc5eb1869048b59d2052645208cc5a14537814568d9c985c93319e55) // vk.TABLE_TYPE.y + mstore(add(_vk, 0x540), 0x0f973c9af1150675ae6dac1ea8ea366e5b8db13bb9c2237ab11c40dfb644ebf5) // vk.ID1.x + mstore(add(_vk, 0x560), 0x06b0c966f9edab490ac15a176d35d56996cc66854268197989a53ab0d1368188) // vk.ID1.y + mstore(add(_vk, 0x580), 0x09e719130bb46416efa070d08d82cc07fe0ed3bd8685616b92b4b9619e0807b2) // vk.ID2.x + mstore(add(_vk, 0x5a0), 0x18f35ee01438dda2443da27299404d09ccfff098a0ceac2e9a10bf2a96bc11ac) // vk.ID2.y + mstore(add(_vk, 0x5c0), 0x0cb835c737d324b9ff5bba45988dc4921104803b7e37649f8c628f0de26361ac) // vk.ID3.x + mstore(add(_vk, 0x5e0), 0x18ca0ac87859387aa32c6939f7a4a0d322879a3fdb1ef85d06addcddc13acea5) // vk.ID3.y + mstore(add(_vk, 0x600), 0x0047304b09efd9315a96d9e802c9a50c1964076026e5f17aff825d6cfc38d823) // vk.ID4.x + mstore(add(_vk, 0x620), 0x21c9f3aa4cbe8ee21422052f7c22d3d8a5a9a89c262a5a5cb52d8802f6106c49) // vk.ID4.y mstore(add(_vk, 0x640), 0x01) // vk.contains_recursive_proof mstore(add(_vk, 0x660), 0) // vk.recursive_proof_public_input_indices mstore(add(_vk, 0x680), 0x260e01b251f6f1c7e7ff4e580791dee8ea51d87a358e038b4efe30fac09383c1) // vk.g2_x.X.c1 mstore(add(_vk, 0x6a0), 0x0118c4d5b837bcc2bc89b5b398b5974e9f5944073b32078b7e231fec938883b0) // vk.g2_x.X.c0 mstore(add(_vk, 0x6c0), 0x04fc6369f7110fe3d25156c1bb9a72859cf2a04641f99ba4ee413c80da6a5fe4) // vk.g2_x.Y.c1 mstore(add(_vk, 0x6e0), 0x22febda3c0c0632a56475b4214e5615e11e6dd3f96e6cea2854a87d4dacc5e55) // vk.g2_x.Y.c0 - mstore(_omegaInverseLoc, 0x06e402c0a314fb67a15cf806664ae1b722dbc0efe66e6c81d98f9924ca535321) // vk.work_root_inverse + mstore(_omegaInverseLoc, 0x036853f083780e87f8d7c71d111119c57dbe118c22d5ad707a82317466c5174c) // vk.work_root_inverse } } }