From 7fa8f844d9e389f8636118f1d3c3ecce707e771e Mon Sep 17 00:00:00 2001 From: maramihali Date: Mon, 9 Dec 2024 09:53:47 +0000 Subject: [PATCH] chore: Optimise grand product computation round based on active ranges (#10460) Skip computation of numerator and denominator of z_perm at indexes that are not part of the active ranges, and refine the threshold in commit_structured for `z_perm`. New benchmarks: **Now: 5.6 s difference** We still see a difference between committing to z_perm between an ambient trace of 2^19 and 2^20, caused by the fact that the active ranges complement are larger (i.e. the ranges in the trace blocks where z_perm is constant) because the blocks themselves are larger. We make sure to at least avoid computing and committing to z_perm after the final active wire index. --- .../commitment_schemes/commitment_key.hpp | 59 ++++++++++--------- .../batched_affine_addition.cpp | 1 + .../barretenberg/ecc/fields/field_impl.hpp | 3 +- .../cpp/src/barretenberg/flavor/flavor.hpp | 2 +- .../execution_trace_usage_tracker.hpp | 1 + .../library/grand_product_library.hpp | 36 +++++++---- .../relations/databus_lookup_relation.hpp | 1 + .../relations/logderiv_lookup_relation.hpp | 1 + .../stdlib_circuit_builders/mega_flavor.hpp | 4 +- .../stdlib_circuit_builders/ultra_flavor.hpp | 2 +- .../trace_to_polynomials.cpp | 1 + .../barretenberg/ultra_honk/oink_prover.cpp | 5 +- .../barretenberg/ultra_honk/oink_prover.hpp | 7 ++- 13 files changed, 79 insertions(+), 44 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp index 6440c016a62..81ef54a2b9d 100644 --- a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp +++ b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp @@ -85,7 +85,7 @@ template class CommitmentKey { */ Commitment commit(PolynomialSpan polynomial) { - PROFILE_THIS(); + PROFILE_THIS_NAME("commit"); // We must have a power-of-2 SRS points *after* subtracting by start_index. size_t dyadic_poly_size = numeric::round_up_power_2(polynomial.size()); // Because pippenger prefers a power-of-2 size, we must choose a starting index for the points so that we don't @@ -133,7 +133,7 @@ template class CommitmentKey { */ Commitment commit_sparse(PolynomialSpan polynomial) { - PROFILE_THIS(); + PROFILE_THIS_NAME("commit_sparse"); const size_t poly_size = polynomial.size(); ASSERT(polynomial.end_index() <= srs->get_monomial_size()); @@ -204,21 +204,24 @@ template class CommitmentKey { * @return Commitment */ Commitment commit_structured(PolynomialSpan polynomial, - const std::vector>& active_ranges) + const std::vector>& active_ranges, + size_t final_active_wire_idx = 0) { - BB_OP_COUNT_TIME(); + PROFILE_THIS_NAME("commit_structured"); ASSERT(polynomial.end_index() <= srs->get_monomial_size()); // Percentage of nonzero coefficients beyond which we resort to the conventional commit method constexpr size_t NONZERO_THRESHOLD = 75; + // Compute the number of non-zero coefficients in the polynomial size_t total_num_scalars = 0; - for (const auto& range : active_ranges) { - total_num_scalars += range.second - range.first; + for (const auto& [first, second] : active_ranges) { + total_num_scalars += second - first; } // Compute "active" percentage of polynomial; resort to standard commit if appropriate - size_t percentage_nonzero = total_num_scalars * 100 / polynomial.size(); + size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size(); + size_t percentage_nonzero = total_num_scalars * 100 / polynomial_size; if (percentage_nonzero > NONZERO_THRESHOLD) { return commit(polynomial); } @@ -259,9 +262,10 @@ template class CommitmentKey { * @return Commitment */ Commitment commit_structured_with_nonzero_complement(PolynomialSpan polynomial, - const std::vector>& active_ranges) + const std::vector>& active_ranges, + size_t final_active_wire_idx = 0) { - BB_OP_COUNT_TIME(); + PROFILE_THIS_NAME("commit_structured_with_nonzero_complement"); ASSERT(polynomial.end_index() <= srs->get_monomial_size()); using BatchedAddition = BatchedAffineAddition; @@ -273,20 +277,21 @@ template class CommitmentKey { // Note: the range from the end of the last active range to the end of the polynomial is excluded from the // complement since the polynomial is assumed to be zero there. std::vector> active_ranges_complement; + // Also compute total number of scalars in the constant regions + size_t total_num_complement_scalars = 0; for (size_t i = 0; i < active_ranges.size() - 1; ++i) { const size_t start = active_ranges[i].second; const size_t end = active_ranges[i + 1].first; - active_ranges_complement.emplace_back(start, end); - } - - // Compute the total number of scalars in the constant regions - size_t total_num_complement_scalars = 0; - for (const auto& range : active_ranges_complement) { - total_num_complement_scalars += range.second - range.first; + if (end > start) { + active_ranges_complement.emplace_back(start, end); + total_num_complement_scalars += end - start; + } } + size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size(); // Compute percentage of polynomial comprised of constant blocks; resort to standard commit if appropriate - size_t percentage_constant = total_num_complement_scalars * 100 / polynomial.size(); + size_t percentage_constant = total_num_complement_scalars * 100 / polynomial_size; + if (percentage_constant < CONSTANT_THRESHOLD) { return commit(polynomial); } @@ -299,12 +304,11 @@ template class CommitmentKey { // TODO(https://github.com/AztecProtocol/barretenberg/issues/1131): Peak memory usage could be improved by // performing this copy and the subsequent summation as a precomputation prior to constructing the point table. std::vector points; - points.reserve(2 * total_num_complement_scalars); - for (const auto& range : active_ranges_complement) { - const size_t start = 2 * range.first; - const size_t end = 2 * range.second; - for (size_t i = start; i < end; i += 2) { - points.emplace_back(point_table[i]); + + points.reserve(total_num_complement_scalars); + for (const auto& [start, end] : active_ranges_complement) { + for (size_t i = start; i < end; i++) { + points.emplace_back(point_table[2 * i]); } } @@ -313,17 +317,16 @@ template class CommitmentKey { std::vector unique_scalars; std::vector sequence_counts; for (const auto& range : active_ranges_complement) { - if (range.second - range.first > 0) { // only ranges with nonzero length - unique_scalars.emplace_back(polynomial.span[range.first]); - sequence_counts.emplace_back(range.second - range.first); - } + unique_scalars.emplace_back(polynomial.span[range.first]); + sequence_counts.emplace_back(range.second - range.first); } // Reduce each sequence to a single point auto reduced_points = BatchedAddition::add_in_place(points, sequence_counts); // Compute the full commitment as the sum of the "active" region commitment and the constant region contribution - Commitment result = commit_structured(polynomial, active_ranges); + Commitment result = commit_structured(polynomial, active_ranges, final_active_wire_idx); + for (auto [scalar, point] : zip_view(unique_scalars, reduced_points)) { result = result + point * scalar; } diff --git a/barretenberg/cpp/src/barretenberg/ecc/batched_affine_addition/batched_affine_addition.cpp b/barretenberg/cpp/src/barretenberg/ecc/batched_affine_addition/batched_affine_addition.cpp index 447e6039c55..058bce37738 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/batched_affine_addition/batched_affine_addition.cpp +++ b/barretenberg/cpp/src/barretenberg/ecc/batched_affine_addition/batched_affine_addition.cpp @@ -10,6 +10,7 @@ template std::vector::G1> BatchedAffineAddition::add_in_place( const std::span& points, const std::vector& sequence_counts) { + PROFILE_THIS_NAME("BatchedAffineAddition::add_in_place"); // Instantiate scratch space for point addition denominators and their calculation std::vector scratch_space_vector(points.size()); std::span scratch_space(scratch_space_vector); diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp index 0ed0c481dec..5a747150eb1 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp @@ -404,9 +404,10 @@ template void field::batch_invert(field* coeffs, const size_t n) no batch_invert(std::span{ coeffs, n }); } +// TODO(https://github.com/AztecProtocol/barretenberg/issues/1166) template void field::batch_invert(std::span coeffs) noexcept { - BB_OP_COUNT_TRACK_NAME("fr::batch_invert"); + PROFILE_THIS_NAME("fr::batch_invert"); const size_t n = coeffs.size(); auto temporaries_ptr = std::static_pointer_cast(get_mem_slab(n * sizeof(field))); diff --git a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp index b9230e4675a..e6fe7ef2aa3 100644 --- a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp @@ -123,7 +123,7 @@ template class ProvingKey_ { // folded element by element. std::vector public_inputs; - // Ranges of the form [start, end) over which the execution trace is "active" + // Ranges of the form [start, end) where witnesses have non-zero values (hence the execution trace is "active") std::vector> active_block_ranges; ProvingKey_() = default; diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp index 683a20a3d63..91837b267dd 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp @@ -22,6 +22,7 @@ struct ExecutionTraceUsageTracker { MegaTraceFixedBlockSizes fixed_sizes; // fixed size of each block prescribed by structuring // Store active ranges based on the most current accumulator and those based on all but the most recently // accumulated circuit. The former is needed for the combiner calculation and the latter for the perturbator. + // The ranges cover all areas in the trace where relations have nontrivial values. std::vector active_ranges; std::vector previous_active_ranges; diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/library/grand_product_library.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/library/grand_product_library.hpp index 58b1d35630a..ee7eeb3c437 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/library/grand_product_library.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/library/grand_product_library.hpp @@ -56,8 +56,11 @@ namespace bb { template void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials, bb::RelationParameters& relation_parameters, - size_t size_override = 0) + size_t size_override = 0, + std::vector> active_block_ranges = {}) { + PROFILE_THIS_NAME("compute_grand_product"); + using FF = typename Flavor::FF; using Polynomial = typename Flavor::Polynomial; using Accumulator = std::tuple_element_t<0, typename GrandProdRelation::SumcheckArrayOfValuesOverSubrelations>; @@ -84,22 +87,34 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials, Polynomial numerator{ domain_size, domain_size }; Polynomial denominator{ domain_size, domain_size }; + auto check_is_active = [&](size_t idx) { + if (active_block_ranges.empty()) { + return true; + } + return std::any_of(active_block_ranges.begin(), active_block_ranges.end(), [idx](const auto& range) { + return idx >= range.first && idx < range.second; + }); + }; + // Step (1) // Populate `numerator` and `denominator` with the algebra described by Relation + FF gamma_fourth = relation_parameters.gamma.pow(4); parallel_for(num_threads, [&](size_t thread_idx) { - typename Flavor::AllValues evaluations; - // TODO(https://github.com/AztecProtocol/barretenberg/issues/940): construction of evaluations is equivalent to - // calling get_row which creates full copies. avoid? + typename Flavor::AllValues row; const size_t start = idx_bounds[thread_idx].first; const size_t end = idx_bounds[thread_idx].second; for (size_t i = start; i < end; ++i) { - for (auto [eval, full_poly] : zip_view(evaluations.get_all(), full_polynomials.get_all())) { - eval = full_poly.size() > i ? full_poly[i] : 0; + if (check_is_active(i)) { + // TODO(https://github.com/AztecProtocol/barretenberg/issues/940):consider avoiding get_row if possible. + row = full_polynomials.get_row(i); + numerator.at(i) = + GrandProdRelation::template compute_grand_product_numerator(row, relation_parameters); + denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator( + row, relation_parameters); + } else { + numerator.at(i) = gamma_fourth; + denominator.at(i) = gamma_fourth; } - numerator.at(i) = GrandProdRelation::template compute_grand_product_numerator( - evaluations, relation_parameters); - denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator( - evaluations, relation_parameters); } }); @@ -163,6 +178,7 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials, auto& grand_product_polynomial = GrandProdRelation::get_grand_product_polynomial(full_polynomials); // We have a 'virtual' 0 at the start (as this is a to-be-shifted polynomial) ASSERT(grand_product_polynomial.start_index() == 1); + parallel_for(num_threads, [&](size_t thread_idx) { const size_t start = idx_bounds[thread_idx].first; const size_t end = idx_bounds[thread_idx].second; diff --git a/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp index 3b00b5bff69..d51062f991f 100644 --- a/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp @@ -231,6 +231,7 @@ template class DatabusLookupRelationImpl { auto& relation_parameters, const size_t circuit_size) { + PROFILE_THIS_NAME("Databus::compute_logderivative_inverse"); auto& inverse_polynomial = BusData::inverses(polynomials); size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread diff --git a/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp index 686b751903c..dbd45cc2ed5 100644 --- a/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp +++ b/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp @@ -156,6 +156,7 @@ template class LogDerivLookupRelationImpl { auto& relation_parameters, const size_t circuit_size) { + PROFILE_THIS_NAME("Lookup::compute_logderivative_inverse"); auto& inverse_polynomial = get_inverse_polynomial(polynomials); size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp index f4129c363a3..24919feb257 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp @@ -492,6 +492,8 @@ class MegaFlavor { */ void compute_logderivative_inverses(const RelationParameters& relation_parameters) { + PROFILE_THIS_NAME("compute_logderivative_inverses"); + // Compute inverses for conventional lookups LogDerivLookupRelation::compute_logderivative_inverse( this->polynomials, relation_parameters, this->circuit_size); @@ -525,7 +527,7 @@ class MegaFlavor { // Compute permutation grand product polynomial compute_grand_product>( - this->polynomials, relation_parameters, size_override); + this->polynomials, relation_parameters, size_override, this->active_block_ranges); } uint64_t estimate_memory() diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp index 6ff45fb338d..f34d0b48733 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp @@ -328,7 +328,7 @@ class UltraFlavor { [[nodiscard]] size_t get_polynomial_size() const { return q_c.size(); } [[nodiscard]] AllValues get_row(const size_t row_idx) const { - PROFILE_THIS(); + PROFILE_THIS_NAME("UltraFlavor::get_row"); AllValues result; for (auto [result_field, polynomial] : zip_view(result.get_all(), get_all())) { result_field = polynomial[row_idx]; diff --git a/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp b/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp index 549d42564fd..39eccd6ef01 100644 --- a/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp +++ b/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp @@ -150,6 +150,7 @@ typename TraceToPolynomials::TraceData TraceToPolynomials::const // otherwise, the next block starts immediately following the previous one offset += block.get_fixed_size(is_structured); } + return trace_data; } diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp index c19a43f8f98..7377c7b31c7 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp @@ -235,6 +235,7 @@ template void OinkProver::execute_grand_product_c { PROFILE_THIS_NAME("OinkProver::execute_grand_product_computation_round"); // Compute the permutation grand product polynomial + proving_key->proving_key.compute_grand_product_polynomial(proving_key->relation_parameters, proving_key->final_active_wire_idx + 1); @@ -243,7 +244,9 @@ template void OinkProver::execute_grand_product_c if (proving_key->get_is_structured()) { witness_commitments.z_perm = proving_key->proving_key.commitment_key->commit_structured_with_nonzero_complement( - proving_key->proving_key.polynomials.z_perm, proving_key->proving_key.active_block_ranges); + proving_key->proving_key.polynomials.z_perm, + proving_key->proving_key.active_block_ranges, + proving_key->final_active_wire_idx + 1); } else { witness_commitments.z_perm = proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.z_perm); diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp index 8ab9fed7aa1..e6db742510b 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp @@ -19,6 +19,7 @@ // clang-format on #include +#include "barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp" #include "barretenberg/ultra_honk/decider_proving_key.hpp" namespace bb { @@ -40,16 +41,20 @@ template class OinkProver { std::shared_ptr proving_key; std::shared_ptr transcript; std::string domain_separator; + ExecutionTraceUsageTracker trace_usage_tracker; + typename Flavor::WitnessCommitments witness_commitments; typename Flavor::CommitmentLabels commitment_labels; using RelationSeparator = typename Flavor::RelationSeparator; OinkProver(std::shared_ptr proving_key, const std::shared_ptr& transcript = std::make_shared(), - std::string domain_separator = "") + std::string domain_separator = "", + const ExecutionTraceUsageTracker& trace_usage_tracker = ExecutionTraceUsageTracker{}) : proving_key(proving_key) , transcript(transcript) , domain_separator(std::move(domain_separator)) + , trace_usage_tracker(trace_usage_tracker) {} void prove();