diff --git a/barretenberg/cpp/scripts/analyze_client_ivc_bench.py b/barretenberg/cpp/scripts/analyze_client_ivc_bench.py index 600854679c5..28213fc16d9 100755 --- a/barretenberg/cpp/scripts/analyze_client_ivc_bench.py +++ b/barretenberg/cpp/scripts/analyze_client_ivc_bench.py @@ -16,13 +16,17 @@ # Single out an independent set of functions accounting for most of BENCHMARK's real_time to_keep = [ "construct_circuits(t)", - "DeciderProvingKey(Circuit&)(t)", + # "DeciderProvingKey(Circuit&)(t)", "ProtogalaxyProver::prove(t)", + "initialise coefficient tree level(t)", + "ProtogalaxyProver_::compute_row_evaluations(t)", + "ProtogalaxyProver_::construct_perturbator_coefficients(t)", + "ProtogalaxyProver_::construct_coefficients_tree(t)", "Decider::construct_proof(t)", "ECCVMProver(CircuitBuilder&)(t)", "ECCVMProver::construct_proof(t)", "TranslatorProver::construct_proof(t)", - "Goblin::merge(t)" + # "Goblin::merge(t)" ] with open(PREFIX / IVC_BENCH_JSON, "r") as read_file: diff --git a/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp index 6033648972c..7aa1b8910d8 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp @@ -33,7 +33,7 @@ class ClientIVCBench : public benchmark::Fixture { */ BENCHMARK_DEFINE_F(ClientIVCBench, Full)(benchmark::State& state) { - ClientIVC ivc{ { CLIENT_IVC_BENCH_STRUCTURE } }; + ClientIVC ivc{ { EXAMPLE_20 } }; auto total_num_circuits = 2 * static_cast(state.range(0)); // 2x accounts for kernel circuits auto mocked_vkeys = mock_verification_keys(total_num_circuits); diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp index efad2d0e072..7041ea8cd38 100644 --- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp +++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp @@ -161,6 +161,25 @@ template class ProtogalaxyProverInternal { return aggregated_relation_evaluations; } + + static std::vector> initialise_coefficient_tree_level(const size_t level_width, const size_t degree) + { + PROFILE_THIS_NAME("initialise coefficient tree level"); + std::vector> level_coeffs(level_width); + size_t num_threads = calculate_num_threads(level_width); + size_t range_per_thread = level_width / num_threads; + size_t leftovers = level_width - (range_per_thread * num_threads); + parallel_for(num_threads, [&](size_t j) { + size_t offset = j * range_per_thread; + size_t range = (j == num_threads - 1) ? range_per_thread + leftovers : range_per_thread; + ASSERT(offset < level_width || level_width == 0); + ASSERT((offset + range) <= level_width); + for (size_t idx = offset; idx < offset + range; idx++) { + level_coeffs[idx].resize(degree + 1); + } + }); + return level_coeffs; + } /** * @brief Recursively compute the parent nodes of each level in the tree, starting from the leaves. Note that at * each level, the resulting parent nodes will be polynomials of degree (level+1) because we multiply by an @@ -171,24 +190,28 @@ template class ProtogalaxyProverInternal { const std::vector>& prev_level_coeffs, size_t level = 1) { + if (level == betas.size()) { return prev_level_coeffs[0]; } - - auto degree = level + 1; - auto prev_level_width = prev_level_coeffs.size(); - std::vector> level_coeffs(prev_level_width / 2, std::vector(degree + 1, 0)); - parallel_for_heuristic( - prev_level_width / 2, - [&](size_t parent) { - size_t node = parent * 2; - std::copy(prev_level_coeffs[node].begin(), prev_level_coeffs[node].end(), level_coeffs[parent].begin()); - for (size_t d = 0; d < degree; d++) { - level_coeffs[parent][d] += prev_level_coeffs[node + 1][d] * betas[level]; - level_coeffs[parent][d + 1] += prev_level_coeffs[node + 1][d] * deltas[level]; - } - }, - /* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * degree * 3); + const size_t degree = level + 1; + const size_t level_width = prev_level_coeffs.size() / 2; + std::vector> level_coeffs = initialise_coefficient_tree_level(level_width, degree); + { + PROFILE_THIS_NAME("other coefficients tree computation"); + parallel_for_heuristic( + level_width, + [&](size_t parent) { + size_t node = parent * 2; + std::copy( + prev_level_coeffs[node].begin(), prev_level_coeffs[node].end(), level_coeffs[parent].begin()); + for (size_t d = 0; d < degree; d++) { + level_coeffs[parent][d] += prev_level_coeffs[node + 1][d] * betas[level]; + level_coeffs[parent][d + 1] += prev_level_coeffs[node + 1][d] * deltas[level]; + } + }, + /* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * degree * 3); + } return construct_coefficients_tree(betas, deltas, level_coeffs, level + 1); } @@ -206,17 +229,36 @@ template class ProtogalaxyProverInternal { std::span deltas, const std::vector& full_honk_evaluations) { - auto width = full_honk_evaluations.size(); - std::vector> first_level_coeffs(width / 2, std::vector(2, 0)); - parallel_for_heuristic( - width / 2, - [&](size_t parent) { - size_t node = parent * 2; - first_level_coeffs[parent][0] = - full_honk_evaluations[node] + full_honk_evaluations[node + 1] * betas[0]; - first_level_coeffs[parent][1] = full_honk_evaluations[node + 1] * deltas[0]; - }, - /* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * 3); + + const size_t width = full_honk_evaluations.size() / 2; + std::vector> first_level_coeffs = initialise_coefficient_tree_level(width, 1); + // { + // PROFILE_THIS_NAME("first level coefficients allocation"); + // size_t num_threads = calculate_num_threads(width); + // size_t range_per_thread = width / num_threads; + // size_t leftovers = width - (range_per_thread * num_threads); + // parallel_for(num_threads, [&](size_t j) { + // size_t offset = j * range_per_thread; + // size_t range = (j == num_threads - 1) ? range_per_thread + leftovers : range_per_thread; + // ASSERT(offset < width || width == 0); + // ASSERT((offset + range) <= width); + // for (size_t idx = offset; idx < offset + range; idx++) { + // first_level_coeffs[idx].resize(2); + // } + // }); + // } + { + PROFILE_THIS_NAME("perturbator coefficients first level computation"); + parallel_for_heuristic( + width, + [&](size_t parent) { + size_t node = parent * 2; + first_level_coeffs[parent][0] = + full_honk_evaluations[node] + full_honk_evaluations[node + 1] * betas[0]; + first_level_coeffs[parent][1] = full_honk_evaluations[node + 1] * deltas[0]; + }, + /* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * 3); + } return construct_coefficients_tree(betas, deltas, first_level_coeffs); }