From acc6b95d5d5573cad558552ec29dccd769fa9a51 Mon Sep 17 00:00:00 2001 From: Zachary James Williamson Date: Tue, 25 Apr 2023 00:05:15 +0200 Subject: [PATCH] Zw/recursion constraint reduction (#377) * removed blake3s hash from ultraplonk recursive prover * UltraComposer will now not create duplicate non-native field multiplication constraints * Propagate new stuff to Honk and splitting_tmp. * Clean up and add comments. --------- Co-authored-by: codygunton --- .../honk/composer/ultra_honk_composer.hpp | 5 +- .../composer/ultra_honk_composer.test.cpp | 4 +- .../splitting_tmp/ultra_plonk_composer.hpp | 39 ++-- .../ultra_plonk_composer.test.cpp | 2 +- .../plonk/composer/ultra_composer.cpp | 206 +++++++++++------- .../plonk/composer/ultra_composer.hpp | 105 +++++++-- .../plonk/composer/ultra_composer.test.cpp | 2 +- .../ultra_circuit_constructor.cpp | 205 ++++++++++------- .../ultra_circuit_constructor.hpp | 97 ++++++++- .../primitives/bigfield/bigfield_impl.hpp | 6 +- .../recursion/transcript/transcript.hpp | 46 +++- .../barretenberg/transcript/transcript.cpp | 17 +- .../barretenberg/transcript/transcript.hpp | 1 + 13 files changed, 520 insertions(+), 215 deletions(-) diff --git a/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp b/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp index 5fa40b4626..f2eb030039 100644 --- a/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp +++ b/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp @@ -357,11 +357,10 @@ class UltraHonkComposer { }; // std::array decompose_non_native_field_double_width_limb( // const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array evaluate_non_native_field_multiplication( + std::array queue_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true) { - return circuit_constructor.evaluate_non_native_field_multiplication(input, - range_constrain_quotient_and_remainder); + return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder); }; // std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& // input); typedef std::pair scaled_witness; typedef std::tuple nnf_copy(cached_non_native_field_multiplications); + // // update nnfcount + // std::sort(nnf_copy.begin(), nnf_copy.end()); + + // auto last = std::unique(nnf_copy.begin(), nnf_copy.end()); + // const size_t num_nnf_ops = static_cast(std::distance(nnf_copy.begin(), last)); + // nnfcount = num_nnf_ops * GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC; // } + // // /** // * @brief Get the final number of gates in a circuit, which consists of the sum of: // * 1) Current number number of actual gates // * 2) Number of public inputs, as we'll need to add a gate for each of them // * 3) Number of Rom array-associated gates - // * 4) NUmber of range-list associated gates + // * 4) Number of range-list associated gates + // * 5) Number of non-native field multiplication gates. // * // * @return size_t // */ + // // virtual size_t get_num_gates() const override // { // // if circuit finalised already added extra gates @@ -234,8 +244,9 @@ class UltraPlonkComposer { // size_t rangecount = 0; // size_t romcount = 0; // size_t ramcount = 0; - // get_num_gates_split_into_components(count, rangecount, romcount, ramcount); - // return count + romcount + ramcount + rangecount; + // size_t nnfcount = 0; + // get_num_gates_split_into_components(count, rangecount, romcount, ramcount, nnfcount); + // return count + romcount + ramcount + rangecount + nnfcount; // } // virtual void print_num_gates() const override @@ -244,12 +255,13 @@ class UltraPlonkComposer { // size_t rangecount = 0; // size_t romcount = 0; // size_t ramcount = 0; - - // get_num_gates_split_into_components(count, rangecount, romcount, ramcount); + // size_t nnfcount = 0; + // get_num_gates_split_into_components(count, rangecount, romcount, ramcount, nnfcount); // size_t total = count + romcount + ramcount + rangecount; // std::cout << "gates = " << total << " (arith " << count << ", rom " << romcount << ", ram " << ramcount - // << ", range " << rangecount << "), pubinp = " << public_inputs.size() << std::endl; + // << ", range " << rangecount << ", non native field gates " << nnfcount + // << "), pubinp = " << public_inputs.size() << std::endl; // } void assert_equal(const uint32_t a_variable_idx, @@ -367,11 +379,10 @@ class UltraPlonkComposer { }; // std::array decompose_non_native_field_double_width_limb( // const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array evaluate_non_native_field_multiplication( + std::array queue_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true) { - return circuit_constructor.evaluate_non_native_field_multiplication(input, - range_constrain_quotient_and_remainder); + return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder); }; // std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& // input); typedef std::pair scaled_witness; typedef std::tuple UltraComposer::compute_proving_key() * our circuit is finalised, and we must not to execute these functions again. */ if (!circuit_finalised) { + process_non_native_field_multiplications(); process_ROM_arrays(public_inputs.size()); process_RAM_arrays(public_inputs.size()); process_range_lists(); @@ -1846,18 +1847,22 @@ std::array UltraComposer::decompose_non_native_field_double_width_l } /** - * NON NATIVE FIELD MULTIPLICATION CUSTOM GATE SEQUENCE + * @brief Queue up non-native field multiplication data. * - * This method will evaluate the equation (a * b = q * p + r) - * Where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables + * @details The data queued represents a non-native field multiplication identity a * b = q * p + r, + * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables. + * + * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would + * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove + * duplicates in the circuit finishing stage of the proving key computation. * * The non-native field modulus, p, is a circuit constant * * The return value are the witness indices of the two remainder limbs `lo_1, hi_2` * - * N.B. this method does NOT evaluate the prime field component of non-native field multiplications + * N.B.: This method does NOT evaluate the prime field component of non-native field multiplications. **/ -std::array UltraComposer::evaluate_non_native_field_multiplication( +std::array UltraComposer::queue_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder) { @@ -1889,8 +1894,6 @@ std::array UltraComposer::evaluate_non_native_field_multiplication( constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); - constexpr barretenberg::fr LIMB_RSHIFT = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_RSHIFT_2 = barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); @@ -1939,82 +1942,127 @@ std::array UltraComposer::evaluate_non_native_field_multiplication( range_constrain_two_limbs(input.q[2], input.q[3]); } - // product gate 1 - // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 - create_big_add_gate({ input.q[0], - input.q[1], - input.r[1], - lo_1_idx, - input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, - input.neg_modulus[0] * LIMB_SHIFT, - -LIMB_SHIFT, - -LIMB_SHIFT.sqr(), - 0 }, - true); + // Add witnesses into the multiplication cache + // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) + cached_non_native_field_multiplication cache_entry{ + .a = input.a, + .b = input.b, + .q = input.q, + .r = input.r, + .cross_terms = { lo_0_idx, lo_1_idx, hi_0_idx, hi_1_idx, hi_2_idx, hi_3_idx }, + .neg_modulus = input.neg_modulus, + }; + cached_non_native_field_multiplications.emplace_back(cache_entry); - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[0]); - w_4.emplace_back(lo_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); - ++num_gates; - w_l.emplace_back(input.a[0]); - w_r.emplace_back(input.b[0]); - w_o.emplace_back(input.a[3]); - w_4.emplace_back(input.b[3]); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); - ++num_gates; - w_l.emplace_back(input.a[2]); - w_r.emplace_back(input.b[2]); - w_o.emplace_back(input.r[3]); - w_4.emplace_back(hi_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); - ++num_gates; - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[2]); - w_4.emplace_back(hi_1_idx); - apply_aux_selectors(AUX_SELECTORS::NONE); - ++num_gates; + return std::array{ lo_1_idx, hi_3_idx }; +} - /** - * product gate 6 - * - * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 - * - **/ - create_big_add_gate( - { - input.q[2], - input.q[3], - lo_1_idx, - hi_1_idx, - -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], - -input.neg_modulus[0] * LIMB_SHIFT, - -1, - -1, - 0, - }, - true); +/** + * @brief Called in `compute_proving_key` when finalizing circuit. + * Iterates over the cached_non_native_field_multiplication objects, + * removes duplicates, and instantiates the remainder as constraints` + */ +void UltraComposer::process_non_native_field_multiplications() +{ + std::sort(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); - /** - * product gate 7 - * - * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b - **/ - create_big_add_gate({ - hi_3_idx, - input.q[0], - input.q[1], - hi_2_idx, - -1, - input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, - input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, - LIMB_RSHIFT_2, - 0, - }); + auto last = + std::unique(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); - return std::array{ lo_1_idx, hi_3_idx }; + auto it = cached_non_native_field_multiplications.begin(); + + constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; + constexpr barretenberg::fr LIMB_RSHIFT = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_RSHIFT_2 = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); + + // iterate over the cached items and create constraints + while (it != last) { + const auto input = *it; + const uint32_t lo_0_idx = input.cross_terms.lo_0_idx; + const uint32_t lo_1_idx = input.cross_terms.lo_1_idx; + const uint32_t hi_0_idx = input.cross_terms.hi_0_idx; + const uint32_t hi_1_idx = input.cross_terms.hi_1_idx; + const uint32_t hi_2_idx = input.cross_terms.hi_2_idx; + const uint32_t hi_3_idx = input.cross_terms.hi_3_idx; + + // product gate 1 + // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 + create_big_add_gate({ input.q[0], + input.q[1], + input.r[1], + lo_1_idx, + input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, + input.neg_modulus[0] * LIMB_SHIFT, + -LIMB_SHIFT, + -LIMB_SHIFT.sqr(), + 0 }, + true); + + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[0]); + w_4.emplace_back(lo_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); + ++num_gates; + w_l.emplace_back(input.a[0]); + w_r.emplace_back(input.b[0]); + w_o.emplace_back(input.a[3]); + w_4.emplace_back(input.b[3]); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); + ++num_gates; + w_l.emplace_back(input.a[2]); + w_r.emplace_back(input.b[2]); + w_o.emplace_back(input.r[3]); + w_4.emplace_back(hi_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); + ++num_gates; + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[2]); + w_4.emplace_back(hi_1_idx); + apply_aux_selectors(AUX_SELECTORS::NONE); + ++num_gates; + + /** + * product gate 6 + * + * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 + * + **/ + create_big_add_gate( + { + input.q[2], + input.q[3], + lo_1_idx, + hi_1_idx, + -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], + -input.neg_modulus[0] * LIMB_SHIFT, + -1, + -1, + 0, + }, + true); + + /** + * product gate 7 + * + * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b + **/ + create_big_add_gate({ + hi_3_idx, + input.q[0], + input.q[1], + hi_2_idx, + -1, + input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, + input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, + LIMB_RSHIFT_2, + 0, + }); + ++it; + } } /** diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp index 1ce6813d40..6162066f83 100644 --- a/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp +++ b/cpp/src/barretenberg/plonk/composer/ultra_composer.hpp @@ -7,6 +7,14 @@ namespace proof_system::plonk { +/** + * @brief UltraPlonK: a flavor of PlonK with plookup tables, efficient range constraints, RAM, ROM, and more! + * + * @warning The proof of PlonK's correctness uses the fact that verifier challenges are generated by a hash function + * standing in for a random oracle. UltraPlonK currently uses a hash function (a custom Pedersen hash that uses lookup + * tables) that is known to violate this random oracle assumption. We plan to switch to an algebraic hash function that + * is believed to have the random oracle property in a future upgrade. + */ class UltraComposer : public ComposerBase { public: @@ -27,7 +35,8 @@ class UltraComposer : public ComposerBase { static constexpr uint32_t UNINITIALIZED_MEMORY_RECORD = UINT32_MAX; static constexpr size_t NUMBER_OF_GATES_PER_RAM_ACCESS = 2; static constexpr size_t NUMBER_OF_ARITHMETIC_GATES_PER_RAM_ARRAY = 1; - + // number of gates created per non-native field operation in process_non_native_field_multiplications + static constexpr size_t GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC = 7; struct non_native_field_witnesses { // first 4 array elements = limbs // 5th element = prime basis limb @@ -39,6 +48,66 @@ class UltraComposer : public ComposerBase { barretenberg::fr modulus; }; + struct non_native_field_multiplication_cross_terms { + uint32_t lo_0_idx; + uint32_t lo_1_idx; + uint32_t hi_0_idx; + uint32_t hi_1_idx; + uint32_t hi_2_idx; + uint32_t hi_3_idx; + }; + + /** + * @brief Used to store instructions to create non_native_field_multiplication gates. + * We want to cache these (and remove duplicates) as the stdlib code can end up multiplying the same inputs + * repeatedly. + */ + struct cached_non_native_field_multiplication { + std::array a; + std::array b; + std::array q; + std::array r; + non_native_field_multiplication_cross_terms cross_terms; + std::array neg_modulus; + + bool operator==(const cached_non_native_field_multiplication& other) const + { + bool valid = true; + for (size_t i = 0; i < 5; ++i) { + valid = valid && (a[i] == other.a[i]); + valid = valid && (b[i] == other.b[i]); + valid = valid && (q[i] == other.q[i]); + valid = valid && (r[i] == other.r[i]); + } + return valid; + } + bool operator<(const cached_non_native_field_multiplication& other) const + { + if (a < other.a) { + return true; + } + if (a == other.a) { + if (b < other.b) { + return true; + } + if (b == other.b) { + if (q < other.q) { + return true; + } + if (q == other.q) { + if (r < other.r) { + return true; + } + } + } + } + return false; + } + }; + + std::vector cached_non_native_field_multiplications; + void process_non_native_field_multiplications(); + enum AUX_SELECTORS { NONE, LIMB_ACCUMULATE_1, @@ -242,18 +311,18 @@ class UltraComposer : public ComposerBase { * 1) Current number number of actual gates * 2) Number of public inputs, as we'll need to add a gate for each of them * 3) Number of Rom array-associated gates - * 4) NUmber of range-list associated gates + * 4) Number of range-list associated gates + * 5) Number of non-native field multiplication gates. * * * @param count return arument, number of existing gates * @param rangecount return argument, extra gates due to range checks * @param romcount return argument, extra gates due to rom reads * @param ramcount return argument, extra gates due to ram read/writes + * @param nnfcount return argument, extra gates due to queued non native field gates */ - void get_num_gates_split_into_components(size_t& count, - size_t& rangecount, - size_t& romcount, - size_t& ramcount) const + void get_num_gates_split_into_components( + size_t& count, size_t& rangecount, size_t& romcount, size_t& ramcount, size_t& nnfcount) const { count = num_gates; // each ROM gate adds +1 extra gate due to the rom reads being copied to a sorted list set @@ -321,6 +390,13 @@ class UltraComposer : public ComposerBase { rangecount += ram_range_sizes[i]; } } + std::vector nnf_copy(cached_non_native_field_multiplications); + // update nnfcount + std::sort(nnf_copy.begin(), nnf_copy.end()); + + auto last = std::unique(nnf_copy.begin(), nnf_copy.end()); + const size_t num_nnf_ops = static_cast(std::distance(nnf_copy.begin(), last)); + nnfcount = num_nnf_ops * GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC; } /** @@ -328,7 +404,8 @@ class UltraComposer : public ComposerBase { * 1) Current number number of actual gates * 2) Number of public inputs, as we'll need to add a gate for each of them * 3) Number of Rom array-associated gates - * 4) NUmber of range-list associated gates + * 4) Number of range-list associated gates + * 5) Number of non-native field multiplication gates. * * @return size_t */ @@ -342,8 +419,9 @@ class UltraComposer : public ComposerBase { size_t rangecount = 0; size_t romcount = 0; size_t ramcount = 0; - get_num_gates_split_into_components(count, rangecount, romcount, ramcount); - return count + romcount + ramcount + rangecount; + size_t nnfcount = 0; + get_num_gates_split_into_components(count, rangecount, romcount, ramcount, nnfcount); + return count + romcount + ramcount + rangecount + nnfcount; } virtual size_t get_total_circuit_size() const override @@ -366,12 +444,13 @@ class UltraComposer : public ComposerBase { size_t rangecount = 0; size_t romcount = 0; size_t ramcount = 0; - - get_num_gates_split_into_components(count, rangecount, romcount, ramcount); + size_t nnfcount = 0; + get_num_gates_split_into_components(count, rangecount, romcount, ramcount, nnfcount); size_t total = count + romcount + ramcount + rangecount; std::cout << "gates = " << total << " (arith " << count << ", rom " << romcount << ", ram " << ramcount - << ", range " << rangecount << "), pubinp = " << public_inputs.size() << std::endl; + << ", range " << rangecount << ", non native field gates " << nnfcount + << "), pubinp = " << public_inputs.size() << std::endl; } void assert_equal_constant(const uint32_t a_idx, @@ -464,7 +543,7 @@ class UltraComposer : public ComposerBase { const size_t hi_limb_bits = DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); std::array decompose_non_native_field_double_width_limb( const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); - std::array evaluate_non_native_field_multiplication( + std::array queue_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true); std::array evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses& input); typedef std::pair scaled_witness; diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp index 75d30ce519..ccc4ef0069 100644 --- a/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp +++ b/cpp/src/barretenberg/plonk/composer/ultra_composer.test.cpp @@ -649,7 +649,7 @@ TYPED_TEST(ultra_composer, non_native_field_multiplication) UltraComposer::non_native_field_witnesses inputs{ a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)), }; - const auto [lo_1_idx, hi_1_idx] = composer.evaluate_non_native_field_multiplication(inputs); + const auto [lo_1_idx, hi_1_idx] = composer.queue_non_native_field_multiplication(inputs); composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70); TestFixture::prove_and_verify(composer, /*expected_result=*/true); diff --git a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp index d606ab173d..425efbdb58 100644 --- a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp +++ b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.cpp @@ -33,6 +33,7 @@ void UltraCircuitConstructor::finalize_circuit() * our circuit is finalised, and we must not to execute these functions again. */ if (!circuit_finalised) { + process_non_native_field_multiplications(); process_ROM_arrays(public_inputs.size()); process_RAM_arrays(public_inputs.size()); process_range_lists(); @@ -1236,18 +1237,22 @@ std::array UltraCircuitConstructor::decompose_non_native_field_doub } /** - * NON NATIVE FIELD MULTIPLICATION CUSTOM GATE SEQUENCE + * @brief Queue up non-native field multiplication data. * - * This method will evaluate the equation (a * b = q * p + r) - * Where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables + * @details The data queued represents a non-native field multiplication identity a * b = q * p + r, + * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables. + * + * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would + * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove + * duplicates in the circuit finishing stage of the proving key computation. * * The non-native field modulus, p, is a circuit constant * * The return value are the witness indices of the two remainder limbs `lo_1, hi_2` * - * N.B. this method does NOT evaluate the prime field component of non-native field multiplications + * N.B.: This method does NOT evaluate the prime field component of non-native field multiplications. **/ -std::array UltraCircuitConstructor::evaluate_non_native_field_multiplication( +std::array UltraCircuitConstructor::queue_non_native_field_multiplication( const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder) { @@ -1279,8 +1284,6 @@ std::array UltraCircuitConstructor::evaluate_non_native_field_multi constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); - constexpr barretenberg::fr LIMB_RSHIFT = - barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); constexpr barretenberg::fr LIMB_RSHIFT_2 = barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); @@ -1328,83 +1331,127 @@ std::array UltraCircuitConstructor::evaluate_non_native_field_multi range_constrain_two_limbs(input.q[0], input.q[1]); range_constrain_two_limbs(input.q[2], input.q[3]); } + // Add witnesses into the multiplication cache + // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods) + cached_non_native_field_multiplication cache_entry{ + .a = input.a, + .b = input.b, + .q = input.q, + .r = input.r, + .cross_terms = { lo_0_idx, lo_1_idx, hi_0_idx, hi_1_idx, hi_2_idx, hi_3_idx }, + .neg_modulus = input.neg_modulus, + }; + cached_non_native_field_multiplications.emplace_back(cache_entry); - // product gate 1 - // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 - create_big_add_gate({ input.q[0], - input.q[1], - input.r[1], - lo_1_idx, - input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, - input.neg_modulus[0] * LIMB_SHIFT, - -LIMB_SHIFT, - -LIMB_SHIFT.sqr(), - 0 }, - true); + return std::array{ lo_1_idx, hi_3_idx }; +} - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[0]); - w_4.emplace_back(lo_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); - ++num_gates; - w_l.emplace_back(input.a[0]); - w_r.emplace_back(input.b[0]); - w_o.emplace_back(input.a[3]); - w_4.emplace_back(input.b[3]); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); - ++num_gates; - w_l.emplace_back(input.a[2]); - w_r.emplace_back(input.b[2]); - w_o.emplace_back(input.r[3]); - w_4.emplace_back(hi_0_idx); - apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); - ++num_gates; - w_l.emplace_back(input.a[1]); - w_r.emplace_back(input.b[1]); - w_o.emplace_back(input.r[2]); - w_4.emplace_back(hi_1_idx); - apply_aux_selectors(AUX_SELECTORS::NONE); - ++num_gates; +/** + * @brief Called in `compute_proving_key` when finalizing circuit. + * Iterates over the cached_non_native_field_multiplication objects, + * removes duplicates, and instantiates the remainder as constraints` + */ +void UltraCircuitConstructor::process_non_native_field_multiplications() +{ + std::sort(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); - /** - * product gate 6 - * - * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 - * - **/ - create_big_add_gate( - { - input.q[2], - input.q[3], - lo_1_idx, - hi_1_idx, - -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], - -input.neg_modulus[0] * LIMB_SHIFT, - -1, - -1, - 0, - }, - true); + auto last = + std::unique(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end()); - /** - * product gate 7 - * - * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b - **/ - create_big_add_gate({ - hi_3_idx, - input.q[0], - input.q[1], - hi_2_idx, - -1, - input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, - input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, - LIMB_RSHIFT_2, - 0, - }); + auto it = cached_non_native_field_multiplications.begin(); - return std::array{ lo_1_idx, hi_3_idx }; + constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS; + constexpr barretenberg::fr LIMB_RSHIFT = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS); + constexpr barretenberg::fr LIMB_RSHIFT_2 = + barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS)); + + // iterate over the cached items and create constraints + while (it != last) { + const auto input = *it; + const uint32_t lo_0_idx = input.cross_terms.lo_0_idx; + const uint32_t lo_1_idx = input.cross_terms.lo_1_idx; + const uint32_t hi_0_idx = input.cross_terms.hi_0_idx; + const uint32_t hi_1_idx = input.cross_terms.hi_1_idx; + const uint32_t hi_2_idx = input.cross_terms.hi_2_idx; + const uint32_t hi_3_idx = input.cross_terms.hi_3_idx; + + // product gate 1 + // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0 + create_big_add_gate({ input.q[0], + input.q[1], + input.r[1], + lo_1_idx, + input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT, + input.neg_modulus[0] * LIMB_SHIFT, + -LIMB_SHIFT, + -LIMB_SHIFT.sqr(), + 0 }, + true); + + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[0]); + w_4.emplace_back(lo_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1); + ++num_gates; + w_l.emplace_back(input.a[0]); + w_r.emplace_back(input.b[0]); + w_o.emplace_back(input.a[3]); + w_4.emplace_back(input.b[3]); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2); + ++num_gates; + w_l.emplace_back(input.a[2]); + w_r.emplace_back(input.b[2]); + w_o.emplace_back(input.r[3]); + w_4.emplace_back(hi_0_idx); + apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3); + ++num_gates; + w_l.emplace_back(input.a[1]); + w_r.emplace_back(input.b[1]); + w_o.emplace_back(input.r[2]); + w_4.emplace_back(hi_1_idx); + apply_aux_selectors(AUX_SELECTORS::NONE); + ++num_gates; + + /** + * product gate 6 + * + * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0 + * + **/ + create_big_add_gate( + { + input.q[2], + input.q[3], + lo_1_idx, + hi_1_idx, + -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0], + -input.neg_modulus[0] * LIMB_SHIFT, + -1, + -1, + 0, + }, + true); + + /** + * product gate 7 + * + * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b + **/ + create_big_add_gate({ + hi_3_idx, + input.q[0], + input.q[1], + hi_2_idx, + -1, + input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2, + input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2, + LIMB_RSHIFT_2, + 0, + }); + ++it; + } } /** diff --git a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp index 383dccb6f0..2e4803fa44 100644 --- a/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp +++ b/cpp/src/barretenberg/proof_system/circuit_constructors/ultra_circuit_constructor.hpp @@ -29,6 +29,8 @@ static constexpr size_t DEFAULT_NON_NATIVE_FIELD_LIMB_BITS = 68; static constexpr uint32_t UNINITIALIZED_MEMORY_RECORD = UINT32_MAX; static constexpr size_t NUMBER_OF_GATES_PER_RAM_ACCESS = 2; static constexpr size_t NUMBER_OF_ARITHMETIC_GATES_PER_RAM_ARRAY = 1; +// number of gates created per non-native field operation in process_non_native_field_multiplications +static constexpr size_t GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC = 7; struct non_native_field_witnesses { // first 4 array elements = limbs @@ -41,6 +43,63 @@ struct non_native_field_witnesses { barretenberg::fr modulus; }; +struct non_native_field_multiplication_cross_terms { + uint32_t lo_0_idx; + uint32_t lo_1_idx; + uint32_t hi_0_idx; + uint32_t hi_1_idx; + uint32_t hi_2_idx; + uint32_t hi_3_idx; +}; + +/** + * @brief Used to store instructions to create non_native_field_multiplication gates. + * We want to cache these (and remove duplicates) as the stdlib code can end up multiplying the same inputs + * repeatedly. + */ +struct cached_non_native_field_multiplication { + std::array a; + std::array b; + std::array q; + std::array r; + non_native_field_multiplication_cross_terms cross_terms; + std::array neg_modulus; + + bool operator==(const cached_non_native_field_multiplication& other) const + { + bool valid = true; + for (size_t i = 0; i < 5; ++i) { + valid = valid && (a[i] == other.a[i]); + valid = valid && (b[i] == other.b[i]); + valid = valid && (q[i] == other.q[i]); + valid = valid && (r[i] == other.r[i]); + } + return valid; + } + bool operator<(const cached_non_native_field_multiplication& other) const + { + if (a < other.a) { + return true; + } + if (a == other.a) { + if (b < other.b) { + return true; + } + if (b == other.b) { + if (q < other.q) { + return true; + } + if (q == other.q) { + if (r < other.r) { + return true; + } + } + } + } + return false; + } +}; + enum AUX_SELECTORS { NONE, LIMB_ACCUMULATE_1, @@ -201,6 +260,10 @@ class UltraCircuitConstructor : public CircuitConstructorBase memory_write_records; + std::vector cached_non_native_field_multiplications; + + void process_non_native_field_multiplications(); + bool circuit_finalised = false; UltraCircuitConstructor(const size_t size_hint = 0) @@ -286,18 +349,18 @@ class UltraCircuitConstructor : public CircuitConstructorBase nnf_copy(cached_non_native_field_multiplications); + // // update nnfcount + // std::sort(nnf_copy.begin(), nnf_copy.end()); + + // auto last = std::unique(nnf_copy.begin(), nnf_copy.end()); + // const size_t num_nnf_ops = static_cast(std::distance(nnf_copy.begin(), last)); + // nnfcount = num_nnf_ops * GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC; // } // /** @@ -373,7 +443,8 @@ class UltraCircuitConstructor : public CircuitConstructorBase