Zw/recursion constraint reduction (#377)

* removed blake3s hash from ultraplonk recursive prover * UltraComposer will now not create duplicate non-native field multiplication constraints * Propagate new stuff to Honk and splitting_tmp. * Clean up and add comments. --------- Co-authored-by: codygunton <[email protected]>
AztecProtocol · Apr 24, 2023 · acc6b95 · acc6b95
1 parent 87aeb37
commit acc6b95
Show file tree

Hide file tree

Showing 13 changed files with 520 additions and 215 deletions.
diff --git a/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp b/cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp
@@ -357,11 +357,10 @@ class UltraHonkComposer {
     };
     // std::array<uint32_t, 2> decompose_non_native_field_double_width_limb(
     //     const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS));
-    std::array<uint32_t, 2> evaluate_non_native_field_multiplication(
+    std::array<uint32_t, 2> queue_non_native_field_multiplication(
         const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true)
     {
-        return circuit_constructor.evaluate_non_native_field_multiplication(input,
-                                                                            range_constrain_quotient_and_remainder);
+        return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder);
     };
     // std::array<uint32_t, 2> evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses&
     // input); typedef std::pair<uint32_t, barretenberg::fr> scaled_witness; typedef std::tuple<scaled_witness,

diff --git a/cpp/src/barretenberg/honk/composer/ultra_honk_composer.test.cpp b/cpp/src/barretenberg/honk/composer/ultra_honk_composer.test.cpp
@@ -751,7 +751,7 @@ TEST(UltraHonkComposer, non_native_field_multiplication)
         proof_system::non_native_field_witnesses inputs{
             a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)),
         };
-        const auto [lo_1_idx, hi_1_idx] = honk_composer.evaluate_non_native_field_multiplication(inputs);
+        const auto [lo_1_idx, hi_1_idx] = honk_composer.queue_non_native_field_multiplication(inputs);
         honk_composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70);
     }
     {
@@ -798,7 +798,7 @@ TEST(UltraHonkComposer, non_native_field_multiplication)
         proof_system::plonk::UltraComposer::non_native_field_witnesses inputs{
             a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)),
         };
-        const auto [lo_1_idx, hi_1_idx] = plonk_composer.evaluate_non_native_field_multiplication(inputs);
+        const auto [lo_1_idx, hi_1_idx] = plonk_composer.queue_non_native_field_multiplication(inputs);
         plonk_composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70);
     }
 

diff --git a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.hpp
@@ -133,18 +133,18 @@ class UltraPlonkComposer {
     //  * 1) Current number number of actual gates
     //  * 2) Number of public inputs, as we'll need to add a gate for each of them
     //  * 3) Number of Rom array-associated gates
-    //  * 4) NUmber of range-list associated gates
+    //  * 4) Number of range-list associated gates
+    //  * 5) Number of non-native field multiplication gates.
     //  *
     //  *
     //  * @param count return arument, number of existing gates
     //  * @param rangecount return argument, extra gates due to range checks
     //  * @param romcount return argument, extra gates due to rom reads
     //  * @param ramcount return argument, extra gates due to ram read/writes
+    //  * @param nnfcount return argument, extra gates due to queued non native field gates
     //  */
-    // void get_num_gates_split_into_components(size_t& count,
-    //                                          size_t& rangecount,
-    //                                          size_t& romcount,
-    //                                          size_t& ramcount) const
+    // void get_num_gates_split_into_components(
+    //     size_t& count, size_t& rangecount, size_t& romcount, size_t& ramcount, size_t& nnfcount) const
     // {
     //     count = num_gates;
     //     // each ROM gate adds +1 extra gate due to the rom reads being copied to a sorted list set
@@ -213,17 +213,27 @@ class UltraPlonkComposer {
     //             rangecount += ram_range_sizes[i];
     //         }
     //     }
+    //     std::vector<cached_non_native_field_multiplication> nnf_copy(cached_non_native_field_multiplications);
+    //     // update nnfcount
+    //     std::sort(nnf_copy.begin(), nnf_copy.end());
+
+    //     auto last = std::unique(nnf_copy.begin(), nnf_copy.end());
+    //     const size_t num_nnf_ops = static_cast<size_t>(std::distance(nnf_copy.begin(), last));
+    //     nnfcount = num_nnf_ops * GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC;
     // }
+    //
 
     // /**
     //  * @brief Get the final number of gates in a circuit, which consists of the sum of:
     //  * 1) Current number number of actual gates
     //  * 2) Number of public inputs, as we'll need to add a gate for each of them
     //  * 3) Number of Rom array-associated gates
-    //  * 4) NUmber of range-list associated gates
+    //  * 4) Number of range-list associated gates
+    //  * 5) Number of non-native field multiplication gates.
     //  *
     //  * @return size_t
     //  */
+    //
     // virtual size_t get_num_gates() const override
     // {
     //     // if circuit finalised already added extra gates
@@ -234,8 +244,9 @@ class UltraPlonkComposer {
     //     size_t rangecount = 0;
     //     size_t romcount = 0;
     //     size_t ramcount = 0;
-    //     get_num_gates_split_into_components(count, rangecount, romcount, ramcount);
-    //     return count + romcount + ramcount + rangecount;
+    //     size_t nnfcount = 0;
+    //     get_num_gates_split_into_components(count, rangecount, romcount, ramcount, nnfcount);
+    //     return count + romcount + ramcount + rangecount + nnfcount;
     // }
 
     // virtual void print_num_gates() const override
@@ -244,12 +255,13 @@ class UltraPlonkComposer {
     //     size_t rangecount = 0;
     //     size_t romcount = 0;
     //     size_t ramcount = 0;
-
-    //     get_num_gates_split_into_components(count, rangecount, romcount, ramcount);
+    //     size_t nnfcount = 0;
+    //     get_num_gates_split_into_components(count, rangecount, romcount, ramcount, nnfcount);
 
     //     size_t total = count + romcount + ramcount + rangecount;
     //     std::cout << "gates = " << total << " (arith " << count << ", rom " << romcount << ", ram " << ramcount
-    //               << ", range " << rangecount << "), pubinp = " << public_inputs.size() << std::endl;
+    //               << ", range " << rangecount << ", non native field gates " << nnfcount
+    //               << "), pubinp = " << public_inputs.size() << std::endl;
     // }
 
     void assert_equal(const uint32_t a_variable_idx,
@@ -367,11 +379,10 @@ class UltraPlonkComposer {
     };
     // std::array<uint32_t, 2> decompose_non_native_field_double_width_limb(
     //     const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS));
-    std::array<uint32_t, 2> evaluate_non_native_field_multiplication(
+    std::array<uint32_t, 2> queue_non_native_field_multiplication(
         const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true)
     {
-        return circuit_constructor.evaluate_non_native_field_multiplication(input,
-                                                                            range_constrain_quotient_and_remainder);
+        return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder);
     };
     // std::array<uint32_t, 2> evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses&
     // input); typedef std::pair<uint32_t, barretenberg::fr> scaled_witness; typedef std::tuple<scaled_witness,

diff --git a/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp b/cpp/src/barretenberg/plonk/composer/splitting_tmp/ultra_plonk_composer.test.cpp
@@ -783,7 +783,7 @@ TEST(ultra_plonk_composer_splitting_tmp, non_native_field_multiplication)
     non_native_field_witnesses inputs{
         a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)),
     };
-    const auto [lo_1_idx, hi_1_idx] = composer.evaluate_non_native_field_multiplication(inputs);
+    const auto [lo_1_idx, hi_1_idx] = composer.queue_non_native_field_multiplication(inputs);
     composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70);
 
     auto prover = composer.create_prover();

diff --git a/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp b/cpp/src/barretenberg/plonk/composer/ultra_composer.cpp
@@ -531,6 +531,7 @@ std::shared_ptr<proving_key> UltraComposer::compute_proving_key()
      * our circuit is finalised, and we must not to execute these functions again.
      */
     if (!circuit_finalised) {
+        process_non_native_field_multiplications();
         process_ROM_arrays(public_inputs.size());
         process_RAM_arrays(public_inputs.size());
         process_range_lists();
@@ -1846,18 +1847,22 @@ std::array<uint32_t, 2> UltraComposer::decompose_non_native_field_double_width_l
 }
 
 /**
- * NON NATIVE FIELD MULTIPLICATION CUSTOM GATE SEQUENCE
+ * @brief Queue up non-native field multiplication data.
  *
- * This method will evaluate the equation (a * b = q * p + r)
- * Where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables
+ * @details The data queued represents a non-native field multiplication identity a * b = q * p + r,
+ * where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables.
+ *
+ * Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would
+ * duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove
+ * duplicates in the circuit finishing stage of the proving key computation.
  *
  * The non-native field modulus, p, is a circuit constant
  *
  * The return value are the witness indices of the two remainder limbs `lo_1, hi_2`
  *
- * N.B. this method does NOT evaluate the prime field component of non-native field multiplications
+ * N.B.: This method does NOT evaluate the prime field component of non-native field multiplications.
  **/
-std::array<uint32_t, 2> UltraComposer::evaluate_non_native_field_multiplication(
+std::array<uint32_t, 2> UltraComposer::queue_non_native_field_multiplication(
     const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder)
 {
 
@@ -1889,8 +1894,6 @@ std::array<uint32_t, 2> UltraComposer::evaluate_non_native_field_multiplication(
     constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS;
     constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS);
     constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS);
-    constexpr barretenberg::fr LIMB_RSHIFT =
-        barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS);
     constexpr barretenberg::fr LIMB_RSHIFT_2 =
         barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS));
 
@@ -1939,82 +1942,127 @@ std::array<uint32_t, 2> UltraComposer::evaluate_non_native_field_multiplication(
         range_constrain_two_limbs(input.q[2], input.q[3]);
     }
 
-    // product gate 1
-    // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0
-    create_big_add_gate({ input.q[0],
-                          input.q[1],
-                          input.r[1],
-                          lo_1_idx,
-                          input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT,
-                          input.neg_modulus[0] * LIMB_SHIFT,
-                          -LIMB_SHIFT,
-                          -LIMB_SHIFT.sqr(),
-                          0 },
-                        true);
+    // Add witnesses into the multiplication cache
+    // (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods)
+    cached_non_native_field_multiplication cache_entry{
+        .a = input.a,
+        .b = input.b,
+        .q = input.q,
+        .r = input.r,
+        .cross_terms = { lo_0_idx, lo_1_idx, hi_0_idx, hi_1_idx, hi_2_idx, hi_3_idx },
+        .neg_modulus = input.neg_modulus,
+    };
+    cached_non_native_field_multiplications.emplace_back(cache_entry);
 
-    w_l.emplace_back(input.a[1]);
-    w_r.emplace_back(input.b[1]);
-    w_o.emplace_back(input.r[0]);
-    w_4.emplace_back(lo_0_idx);
-    apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1);
-    ++num_gates;
-    w_l.emplace_back(input.a[0]);
-    w_r.emplace_back(input.b[0]);
-    w_o.emplace_back(input.a[3]);
-    w_4.emplace_back(input.b[3]);
-    apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2);
-    ++num_gates;
-    w_l.emplace_back(input.a[2]);
-    w_r.emplace_back(input.b[2]);
-    w_o.emplace_back(input.r[3]);
-    w_4.emplace_back(hi_0_idx);
-    apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3);
-    ++num_gates;
-    w_l.emplace_back(input.a[1]);
-    w_r.emplace_back(input.b[1]);
-    w_o.emplace_back(input.r[2]);
-    w_4.emplace_back(hi_1_idx);
-    apply_aux_selectors(AUX_SELECTORS::NONE);
-    ++num_gates;
+    return std::array<uint32_t, 2>{ lo_1_idx, hi_3_idx };
+}
 
-    /**
-     * product gate 6
-     *
-     * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0
-     *
-     **/
-    create_big_add_gate(
-        {
-            input.q[2],
-            input.q[3],
-            lo_1_idx,
-            hi_1_idx,
-            -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0],
-            -input.neg_modulus[0] * LIMB_SHIFT,
-            -1,
-            -1,
-            0,
-        },
-        true);
+/**
+ * @brief Called in `compute_proving_key` when finalizing circuit.
+ * Iterates over the cached_non_native_field_multiplication objects,
+ * removes duplicates, and instantiates the remainder as constraints`
+ */
+void UltraComposer::process_non_native_field_multiplications()
+{
+    std::sort(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end());
 
-    /**
-     * product gate 7
-     *
-     * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b
-     **/
-    create_big_add_gate({
-        hi_3_idx,
-        input.q[0],
-        input.q[1],
-        hi_2_idx,
-        -1,
-        input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2,
-        input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2,
-        LIMB_RSHIFT_2,
-        0,
-    });
+    auto last =
+        std::unique(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end());
 
-    return std::array<uint32_t, 2>{ lo_1_idx, hi_3_idx };
+    auto it = cached_non_native_field_multiplications.begin();
+
+    constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS;
+    constexpr barretenberg::fr LIMB_RSHIFT =
+        barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS);
+    constexpr barretenberg::fr LIMB_RSHIFT_2 =
+        barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS));
+
+    // iterate over the cached items and create constraints
+    while (it != last) {
+        const auto input = *it;
+        const uint32_t lo_0_idx = input.cross_terms.lo_0_idx;
+        const uint32_t lo_1_idx = input.cross_terms.lo_1_idx;
+        const uint32_t hi_0_idx = input.cross_terms.hi_0_idx;
+        const uint32_t hi_1_idx = input.cross_terms.hi_1_idx;
+        const uint32_t hi_2_idx = input.cross_terms.hi_2_idx;
+        const uint32_t hi_3_idx = input.cross_terms.hi_3_idx;
+
+        // product gate 1
+        // (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0
+        create_big_add_gate({ input.q[0],
+                              input.q[1],
+                              input.r[1],
+                              lo_1_idx,
+                              input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT,
+                              input.neg_modulus[0] * LIMB_SHIFT,
+                              -LIMB_SHIFT,
+                              -LIMB_SHIFT.sqr(),
+                              0 },
+                            true);
+
+        w_l.emplace_back(input.a[1]);
+        w_r.emplace_back(input.b[1]);
+        w_o.emplace_back(input.r[0]);
+        w_4.emplace_back(lo_0_idx);
+        apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1);
+        ++num_gates;
+        w_l.emplace_back(input.a[0]);
+        w_r.emplace_back(input.b[0]);
+        w_o.emplace_back(input.a[3]);
+        w_4.emplace_back(input.b[3]);
+        apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2);
+        ++num_gates;
+        w_l.emplace_back(input.a[2]);
+        w_r.emplace_back(input.b[2]);
+        w_o.emplace_back(input.r[3]);
+        w_4.emplace_back(hi_0_idx);
+        apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3);
+        ++num_gates;
+        w_l.emplace_back(input.a[1]);
+        w_r.emplace_back(input.b[1]);
+        w_o.emplace_back(input.r[2]);
+        w_4.emplace_back(hi_1_idx);
+        apply_aux_selectors(AUX_SELECTORS::NONE);
+        ++num_gates;
+
+        /**
+         * product gate 6
+         *
+         * hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0
+         *
+         **/
+        create_big_add_gate(
+            {
+                input.q[2],
+                input.q[3],
+                lo_1_idx,
+                hi_1_idx,
+                -input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0],
+                -input.neg_modulus[0] * LIMB_SHIFT,
+                -1,
+                -1,
+                0,
+            },
+            true);
+
+        /**
+         * product gate 7
+         *
+         * hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b
+         **/
+        create_big_add_gate({
+            hi_3_idx,
+            input.q[0],
+            input.q[1],
+            hi_2_idx,
+            -1,
+            input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2,
+            input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2,
+            LIMB_RSHIFT_2,
+            0,
+        });
+        ++it;
+    }
 }
 
 /**