Skip to content

Commit

Permalink
Zw/recursion constraint reduction (#377)
Browse files Browse the repository at this point in the history
* removed blake3s hash from ultraplonk recursive prover
* UltraComposer will now not create duplicate non-native field multiplication constraints
* Propagate new stuff to Honk and splitting_tmp.
* Clean up and add comments.
---------

Co-authored-by: codygunton <[email protected]>
  • Loading branch information
zac-williamson and codygunton authored Apr 24, 2023
1 parent 87aeb37 commit acc6b95
Show file tree
Hide file tree
Showing 13 changed files with 520 additions and 215 deletions.
5 changes: 2 additions & 3 deletions cpp/src/barretenberg/honk/composer/ultra_honk_composer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,11 +357,10 @@ class UltraHonkComposer {
};
// std::array<uint32_t, 2> decompose_non_native_field_double_width_limb(
// const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS));
std::array<uint32_t, 2> evaluate_non_native_field_multiplication(
std::array<uint32_t, 2> queue_non_native_field_multiplication(
const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true)
{
return circuit_constructor.evaluate_non_native_field_multiplication(input,
range_constrain_quotient_and_remainder);
return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder);
};
// std::array<uint32_t, 2> evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses&
// input); typedef std::pair<uint32_t, barretenberg::fr> scaled_witness; typedef std::tuple<scaled_witness,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ TEST(UltraHonkComposer, non_native_field_multiplication)
proof_system::non_native_field_witnesses inputs{
a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)),
};
const auto [lo_1_idx, hi_1_idx] = honk_composer.evaluate_non_native_field_multiplication(inputs);
const auto [lo_1_idx, hi_1_idx] = honk_composer.queue_non_native_field_multiplication(inputs);
honk_composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70);
}
{
Expand Down Expand Up @@ -798,7 +798,7 @@ TEST(UltraHonkComposer, non_native_field_multiplication)
proof_system::plonk::UltraComposer::non_native_field_witnesses inputs{
a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)),
};
const auto [lo_1_idx, hi_1_idx] = plonk_composer.evaluate_non_native_field_multiplication(inputs);
const auto [lo_1_idx, hi_1_idx] = plonk_composer.queue_non_native_field_multiplication(inputs);
plonk_composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,18 +133,18 @@ class UltraPlonkComposer {
// * 1) Current number number of actual gates
// * 2) Number of public inputs, as we'll need to add a gate for each of them
// * 3) Number of Rom array-associated gates
// * 4) NUmber of range-list associated gates
// * 4) Number of range-list associated gates
// * 5) Number of non-native field multiplication gates.
// *
// *
// * @param count return arument, number of existing gates
// * @param rangecount return argument, extra gates due to range checks
// * @param romcount return argument, extra gates due to rom reads
// * @param ramcount return argument, extra gates due to ram read/writes
// * @param nnfcount return argument, extra gates due to queued non native field gates
// */
// void get_num_gates_split_into_components(size_t& count,
// size_t& rangecount,
// size_t& romcount,
// size_t& ramcount) const
// void get_num_gates_split_into_components(
// size_t& count, size_t& rangecount, size_t& romcount, size_t& ramcount, size_t& nnfcount) const
// {
// count = num_gates;
// // each ROM gate adds +1 extra gate due to the rom reads being copied to a sorted list set
Expand Down Expand Up @@ -213,17 +213,27 @@ class UltraPlonkComposer {
// rangecount += ram_range_sizes[i];
// }
// }
// std::vector<cached_non_native_field_multiplication> nnf_copy(cached_non_native_field_multiplications);
// // update nnfcount
// std::sort(nnf_copy.begin(), nnf_copy.end());

// auto last = std::unique(nnf_copy.begin(), nnf_copy.end());
// const size_t num_nnf_ops = static_cast<size_t>(std::distance(nnf_copy.begin(), last));
// nnfcount = num_nnf_ops * GATES_PER_NON_NATIVE_FIELD_MULTIPLICATION_ARITHMETIC;
// }
//

// /**
// * @brief Get the final number of gates in a circuit, which consists of the sum of:
// * 1) Current number number of actual gates
// * 2) Number of public inputs, as we'll need to add a gate for each of them
// * 3) Number of Rom array-associated gates
// * 4) NUmber of range-list associated gates
// * 4) Number of range-list associated gates
// * 5) Number of non-native field multiplication gates.
// *
// * @return size_t
// */
//
// virtual size_t get_num_gates() const override
// {
// // if circuit finalised already added extra gates
Expand All @@ -234,8 +244,9 @@ class UltraPlonkComposer {
// size_t rangecount = 0;
// size_t romcount = 0;
// size_t ramcount = 0;
// get_num_gates_split_into_components(count, rangecount, romcount, ramcount);
// return count + romcount + ramcount + rangecount;
// size_t nnfcount = 0;
// get_num_gates_split_into_components(count, rangecount, romcount, ramcount, nnfcount);
// return count + romcount + ramcount + rangecount + nnfcount;
// }

// virtual void print_num_gates() const override
Expand All @@ -244,12 +255,13 @@ class UltraPlonkComposer {
// size_t rangecount = 0;
// size_t romcount = 0;
// size_t ramcount = 0;

// get_num_gates_split_into_components(count, rangecount, romcount, ramcount);
// size_t nnfcount = 0;
// get_num_gates_split_into_components(count, rangecount, romcount, ramcount, nnfcount);

// size_t total = count + romcount + ramcount + rangecount;
// std::cout << "gates = " << total << " (arith " << count << ", rom " << romcount << ", ram " << ramcount
// << ", range " << rangecount << "), pubinp = " << public_inputs.size() << std::endl;
// << ", range " << rangecount << ", non native field gates " << nnfcount
// << "), pubinp = " << public_inputs.size() << std::endl;
// }

void assert_equal(const uint32_t a_variable_idx,
Expand Down Expand Up @@ -367,11 +379,10 @@ class UltraPlonkComposer {
};
// std::array<uint32_t, 2> decompose_non_native_field_double_width_limb(
// const uint32_t limb_idx, const size_t num_limb_bits = (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS));
std::array<uint32_t, 2> evaluate_non_native_field_multiplication(
std::array<uint32_t, 2> queue_non_native_field_multiplication(
const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder = true)
{
return circuit_constructor.evaluate_non_native_field_multiplication(input,
range_constrain_quotient_and_remainder);
return circuit_constructor.queue_non_native_field_multiplication(input, range_constrain_quotient_and_remainder);
};
// std::array<uint32_t, 2> evaluate_partial_non_native_field_multiplication(const non_native_field_witnesses&
// input); typedef std::pair<uint32_t, barretenberg::fr> scaled_witness; typedef std::tuple<scaled_witness,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -783,7 +783,7 @@ TEST(ultra_plonk_composer_splitting_tmp, non_native_field_multiplication)
non_native_field_witnesses inputs{
a_indices, b_indices, q_indices, r_indices, modulus_limbs, fr(uint256_t(modulus)),
};
const auto [lo_1_idx, hi_1_idx] = composer.evaluate_non_native_field_multiplication(inputs);
const auto [lo_1_idx, hi_1_idx] = composer.queue_non_native_field_multiplication(inputs);
composer.range_constrain_two_limbs(lo_1_idx, hi_1_idx, 70, 70);

auto prover = composer.create_prover();
Expand Down
206 changes: 127 additions & 79 deletions cpp/src/barretenberg/plonk/composer/ultra_composer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ std::shared_ptr<proving_key> UltraComposer::compute_proving_key()
* our circuit is finalised, and we must not to execute these functions again.
*/
if (!circuit_finalised) {
process_non_native_field_multiplications();
process_ROM_arrays(public_inputs.size());
process_RAM_arrays(public_inputs.size());
process_range_lists();
Expand Down Expand Up @@ -1846,18 +1847,22 @@ std::array<uint32_t, 2> UltraComposer::decompose_non_native_field_double_width_l
}

/**
* NON NATIVE FIELD MULTIPLICATION CUSTOM GATE SEQUENCE
* @brief Queue up non-native field multiplication data.
*
* This method will evaluate the equation (a * b = q * p + r)
* Where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables
* @details The data queued represents a non-native field multiplication identity a * b = q * p + r,
* where a, b, q, r are all emulated non-native field elements that are each split across 4 distinct witness variables.
*
* Without this queue some functions, such as proof_system::plonk::stdlib::element::double_montgomery_ladder, would
* duplicate non-native field operations, which can be quite expensive. We queue up these operations, and remove
* duplicates in the circuit finishing stage of the proving key computation.
*
* The non-native field modulus, p, is a circuit constant
*
* The return value are the witness indices of the two remainder limbs `lo_1, hi_2`
*
* N.B. this method does NOT evaluate the prime field component of non-native field multiplications
* N.B.: This method does NOT evaluate the prime field component of non-native field multiplications.
**/
std::array<uint32_t, 2> UltraComposer::evaluate_non_native_field_multiplication(
std::array<uint32_t, 2> UltraComposer::queue_non_native_field_multiplication(
const non_native_field_witnesses& input, const bool range_constrain_quotient_and_remainder)
{

Expand Down Expand Up @@ -1889,8 +1894,6 @@ std::array<uint32_t, 2> UltraComposer::evaluate_non_native_field_multiplication(
constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS;
constexpr barretenberg::fr LIMB_SHIFT_2 = uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS);
constexpr barretenberg::fr LIMB_SHIFT_3 = uint256_t(1) << (3 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS);
constexpr barretenberg::fr LIMB_RSHIFT =
barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS);
constexpr barretenberg::fr LIMB_RSHIFT_2 =
barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS));

Expand Down Expand Up @@ -1939,82 +1942,127 @@ std::array<uint32_t, 2> UltraComposer::evaluate_non_native_field_multiplication(
range_constrain_two_limbs(input.q[2], input.q[3]);
}

// product gate 1
// (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0
create_big_add_gate({ input.q[0],
input.q[1],
input.r[1],
lo_1_idx,
input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT,
input.neg_modulus[0] * LIMB_SHIFT,
-LIMB_SHIFT,
-LIMB_SHIFT.sqr(),
0 },
true);
// Add witnesses into the multiplication cache
// (when finalising the circuit, we will remove duplicates; several dups produced by biggroup.hpp methods)
cached_non_native_field_multiplication cache_entry{
.a = input.a,
.b = input.b,
.q = input.q,
.r = input.r,
.cross_terms = { lo_0_idx, lo_1_idx, hi_0_idx, hi_1_idx, hi_2_idx, hi_3_idx },
.neg_modulus = input.neg_modulus,
};
cached_non_native_field_multiplications.emplace_back(cache_entry);

w_l.emplace_back(input.a[1]);
w_r.emplace_back(input.b[1]);
w_o.emplace_back(input.r[0]);
w_4.emplace_back(lo_0_idx);
apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1);
++num_gates;
w_l.emplace_back(input.a[0]);
w_r.emplace_back(input.b[0]);
w_o.emplace_back(input.a[3]);
w_4.emplace_back(input.b[3]);
apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2);
++num_gates;
w_l.emplace_back(input.a[2]);
w_r.emplace_back(input.b[2]);
w_o.emplace_back(input.r[3]);
w_4.emplace_back(hi_0_idx);
apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3);
++num_gates;
w_l.emplace_back(input.a[1]);
w_r.emplace_back(input.b[1]);
w_o.emplace_back(input.r[2]);
w_4.emplace_back(hi_1_idx);
apply_aux_selectors(AUX_SELECTORS::NONE);
++num_gates;
return std::array<uint32_t, 2>{ lo_1_idx, hi_3_idx };
}

/**
* product gate 6
*
* hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0
*
**/
create_big_add_gate(
{
input.q[2],
input.q[3],
lo_1_idx,
hi_1_idx,
-input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0],
-input.neg_modulus[0] * LIMB_SHIFT,
-1,
-1,
0,
},
true);
/**
* @brief Called in `compute_proving_key` when finalizing circuit.
* Iterates over the cached_non_native_field_multiplication objects,
* removes duplicates, and instantiates the remainder as constraints`
*/
void UltraComposer::process_non_native_field_multiplications()
{
std::sort(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end());

/**
* product gate 7
*
* hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b
**/
create_big_add_gate({
hi_3_idx,
input.q[0],
input.q[1],
hi_2_idx,
-1,
input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2,
input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2,
LIMB_RSHIFT_2,
0,
});
auto last =
std::unique(cached_non_native_field_multiplications.begin(), cached_non_native_field_multiplications.end());

return std::array<uint32_t, 2>{ lo_1_idx, hi_3_idx };
auto it = cached_non_native_field_multiplications.begin();

constexpr barretenberg::fr LIMB_SHIFT = uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS;
constexpr barretenberg::fr LIMB_RSHIFT =
barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << DEFAULT_NON_NATIVE_FIELD_LIMB_BITS);
constexpr barretenberg::fr LIMB_RSHIFT_2 =
barretenberg::fr(1) / barretenberg::fr(uint256_t(1) << (2 * DEFAULT_NON_NATIVE_FIELD_LIMB_BITS));

// iterate over the cached items and create constraints
while (it != last) {
const auto input = *it;
const uint32_t lo_0_idx = input.cross_terms.lo_0_idx;
const uint32_t lo_1_idx = input.cross_terms.lo_1_idx;
const uint32_t hi_0_idx = input.cross_terms.hi_0_idx;
const uint32_t hi_1_idx = input.cross_terms.hi_1_idx;
const uint32_t hi_2_idx = input.cross_terms.hi_2_idx;
const uint32_t hi_3_idx = input.cross_terms.hi_3_idx;

// product gate 1
// (lo_0 + q_0(p_0 + p_1*2^b) + q_1(p_0*2^b) - (r_1)2^b)2^-2b - lo_1 = 0
create_big_add_gate({ input.q[0],
input.q[1],
input.r[1],
lo_1_idx,
input.neg_modulus[0] + input.neg_modulus[1] * LIMB_SHIFT,
input.neg_modulus[0] * LIMB_SHIFT,
-LIMB_SHIFT,
-LIMB_SHIFT.sqr(),
0 },
true);

w_l.emplace_back(input.a[1]);
w_r.emplace_back(input.b[1]);
w_o.emplace_back(input.r[0]);
w_4.emplace_back(lo_0_idx);
apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_1);
++num_gates;
w_l.emplace_back(input.a[0]);
w_r.emplace_back(input.b[0]);
w_o.emplace_back(input.a[3]);
w_4.emplace_back(input.b[3]);
apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_2);
++num_gates;
w_l.emplace_back(input.a[2]);
w_r.emplace_back(input.b[2]);
w_o.emplace_back(input.r[3]);
w_4.emplace_back(hi_0_idx);
apply_aux_selectors(AUX_SELECTORS::NON_NATIVE_FIELD_3);
++num_gates;
w_l.emplace_back(input.a[1]);
w_r.emplace_back(input.b[1]);
w_o.emplace_back(input.r[2]);
w_4.emplace_back(hi_1_idx);
apply_aux_selectors(AUX_SELECTORS::NONE);
++num_gates;

/**
* product gate 6
*
* hi_2 - hi_1 - lo_1 - q[2](p[1].2^b + p[0]) - q[3](p[0].2^b) = 0
*
**/
create_big_add_gate(
{
input.q[2],
input.q[3],
lo_1_idx,
hi_1_idx,
-input.neg_modulus[1] * LIMB_SHIFT - input.neg_modulus[0],
-input.neg_modulus[0] * LIMB_SHIFT,
-1,
-1,
0,
},
true);

/**
* product gate 7
*
* hi_3 - (hi_2 - q[0](p[3].2^b + p[2]) - q[1](p[2].2^b + p[1])).2^-2b
**/
create_big_add_gate({
hi_3_idx,
input.q[0],
input.q[1],
hi_2_idx,
-1,
input.neg_modulus[3] * LIMB_RSHIFT + input.neg_modulus[2] * LIMB_RSHIFT_2,
input.neg_modulus[2] * LIMB_RSHIFT + input.neg_modulus[1] * LIMB_RSHIFT_2,
LIMB_RSHIFT_2,
0,
});
++it;
}
}

/**
Expand Down
Loading

0 comments on commit acc6b95

Please sign in to comment.