Skip to content

Commit

Permalink
parallelise coefficient tree level structure allocation
Browse files Browse the repository at this point in the history
  • Loading branch information
maramihali committed Nov 29, 2024
1 parent 5c32747 commit 8903c06
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 29 deletions.
8 changes: 6 additions & 2 deletions barretenberg/cpp/scripts/analyze_client_ivc_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@
# Single out an independent set of functions accounting for most of BENCHMARK's real_time
to_keep = [
"construct_circuits(t)",
"DeciderProvingKey(Circuit&)(t)",
# "DeciderProvingKey(Circuit&)(t)",
"ProtogalaxyProver::prove(t)",
"initialise coefficient tree level(t)",
"ProtogalaxyProver_::compute_row_evaluations(t)",
"ProtogalaxyProver_::construct_perturbator_coefficients(t)",
"ProtogalaxyProver_::construct_coefficients_tree(t)",
"Decider::construct_proof(t)",
"ECCVMProver(CircuitBuilder&)(t)",
"ECCVMProver::construct_proof(t)",
"TranslatorProver::construct_proof(t)",
"Goblin::merge(t)"
# "Goblin::merge(t)"
]

with open(PREFIX / IVC_BENCH_JSON, "r") as read_file:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ClientIVCBench : public benchmark::Fixture {
*/
BENCHMARK_DEFINE_F(ClientIVCBench, Full)(benchmark::State& state)
{
ClientIVC ivc{ { CLIENT_IVC_BENCH_STRUCTURE } };
ClientIVC ivc{ { EXAMPLE_20 } };

auto total_num_circuits = 2 * static_cast<size_t>(state.range(0)); // 2x accounts for kernel circuits
auto mocked_vkeys = mock_verification_keys(total_num_circuits);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,25 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {

return aggregated_relation_evaluations;
}

static std::vector<std::vector<FF>> initialise_coefficient_tree_level(const size_t level_width, const size_t degree)
{
PROFILE_THIS_NAME("initialise coefficient tree level");
std::vector<std::vector<FF>> level_coeffs(level_width);
size_t num_threads = calculate_num_threads(level_width);
size_t range_per_thread = level_width / num_threads;
size_t leftovers = level_width - (range_per_thread * num_threads);
parallel_for(num_threads, [&](size_t j) {
size_t offset = j * range_per_thread;
size_t range = (j == num_threads - 1) ? range_per_thread + leftovers : range_per_thread;
ASSERT(offset < level_width || level_width == 0);
ASSERT((offset + range) <= level_width);
for (size_t idx = offset; idx < offset + range; idx++) {
level_coeffs[idx].resize(degree + 1);
}
});
return level_coeffs;
}
/**
* @brief Recursively compute the parent nodes of each level in the tree, starting from the leaves. Note that at
* each level, the resulting parent nodes will be polynomials of degree (level+1) because we multiply by an
Expand All @@ -171,24 +190,28 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {
const std::vector<std::vector<FF>>& prev_level_coeffs,
size_t level = 1)
{

if (level == betas.size()) {
return prev_level_coeffs[0];
}

auto degree = level + 1;
auto prev_level_width = prev_level_coeffs.size();
std::vector<std::vector<FF>> level_coeffs(prev_level_width / 2, std::vector<FF>(degree + 1, 0));
parallel_for_heuristic(
prev_level_width / 2,
[&](size_t parent) {
size_t node = parent * 2;
std::copy(prev_level_coeffs[node].begin(), prev_level_coeffs[node].end(), level_coeffs[parent].begin());
for (size_t d = 0; d < degree; d++) {
level_coeffs[parent][d] += prev_level_coeffs[node + 1][d] * betas[level];
level_coeffs[parent][d + 1] += prev_level_coeffs[node + 1][d] * deltas[level];
}
},
/* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * degree * 3);
const size_t degree = level + 1;
const size_t level_width = prev_level_coeffs.size() / 2;
std::vector<std::vector<FF>> level_coeffs = initialise_coefficient_tree_level(level_width, degree);
{
PROFILE_THIS_NAME("other coefficients tree computation");
parallel_for_heuristic(
level_width,
[&](size_t parent) {
size_t node = parent * 2;
std::copy(
prev_level_coeffs[node].begin(), prev_level_coeffs[node].end(), level_coeffs[parent].begin());
for (size_t d = 0; d < degree; d++) {
level_coeffs[parent][d] += prev_level_coeffs[node + 1][d] * betas[level];
level_coeffs[parent][d + 1] += prev_level_coeffs[node + 1][d] * deltas[level];
}
},
/* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * degree * 3);
}
return construct_coefficients_tree(betas, deltas, level_coeffs, level + 1);
}

Expand All @@ -206,17 +229,36 @@ template <class DeciderProvingKeys_> class ProtogalaxyProverInternal {
std::span<const FF> deltas,
const std::vector<FF>& full_honk_evaluations)
{
auto width = full_honk_evaluations.size();
std::vector<std::vector<FF>> first_level_coeffs(width / 2, std::vector<FF>(2, 0));
parallel_for_heuristic(
width / 2,
[&](size_t parent) {
size_t node = parent * 2;
first_level_coeffs[parent][0] =
full_honk_evaluations[node] + full_honk_evaluations[node + 1] * betas[0];
first_level_coeffs[parent][1] = full_honk_evaluations[node + 1] * deltas[0];
},
/* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * 3);

const size_t width = full_honk_evaluations.size() / 2;
std::vector<std::vector<FF>> first_level_coeffs = initialise_coefficient_tree_level(width, 1);
// {
// PROFILE_THIS_NAME("first level coefficients allocation");
// size_t num_threads = calculate_num_threads(width);
// size_t range_per_thread = width / num_threads;
// size_t leftovers = width - (range_per_thread * num_threads);
// parallel_for(num_threads, [&](size_t j) {
// size_t offset = j * range_per_thread;
// size_t range = (j == num_threads - 1) ? range_per_thread + leftovers : range_per_thread;
// ASSERT(offset < width || width == 0);
// ASSERT((offset + range) <= width);
// for (size_t idx = offset; idx < offset + range; idx++) {
// first_level_coeffs[idx].resize(2);
// }
// });
// }
{
PROFILE_THIS_NAME("perturbator coefficients first level computation");
parallel_for_heuristic(
width,
[&](size_t parent) {
size_t node = parent * 2;
first_level_coeffs[parent][0] =
full_honk_evaluations[node] + full_honk_evaluations[node + 1] * betas[0];
first_level_coeffs[parent][1] = full_honk_evaluations[node + 1] * deltas[0];
},
/* overestimate */ thread_heuristics::FF_MULTIPLICATION_COST * 3);
}
return construct_coefficients_tree(betas, deltas, first_level_coeffs);
}

Expand Down

0 comments on commit 8903c06

Please sign in to comment.