diff --git a/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_branch_vs_baseline.sh b/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_branch_vs_baseline.sh index 71bf3ee5b9..612514c414 100755 --- a/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_branch_vs_baseline.sh +++ b/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_branch_vs_baseline.sh @@ -35,6 +35,7 @@ bin/$BENCH_TARGET --benchmark_format=json > $BRANCH_RESULTS # Checkout baseline branch, run benchmarks, save results in json format echo -e "\nConfiguring and building $BENCH_TARGET in $BASELINE_BRANCH branch..\n" git checkout master > /dev/null +cd $BASE_DIR rm -rf $BUILD_DIR cmake --preset bench > /dev/null && cmake --build --preset bench --target $BENCH_TARGET cd build-bench diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp index a90d422e51..6c81dc30bc 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.hpp @@ -3,6 +3,7 @@ #include #include #include +#include "barretenberg/common/thread.hpp" #include "polynomials/barycentric_data.hpp" #include "polynomials/univariate.hpp" #include "polynomials/pow.hpp" @@ -78,8 +79,6 @@ template class SumcheckRound { RelationUnivariates univariate_accumulators; RelationEvaluations relation_evaluations; - ExtendedEdges extended_edges; - // TODO(#224)(Cody): this should go away BarycentricData barycentric_2_to_max = BarycentricData(); @@ -120,7 +119,7 @@ template class SumcheckRound { * In practice, multivariates is one of ProverPolynomials or FoldedPolynomials. * */ - void extend_edges(auto& multivariates, size_t edge_idx) + void extend_edges(auto& extended_edges, auto& multivariates, size_t edge_idx) { size_t univariate_idx = 0; // TODO(#391) zip for (auto& poly : multivariates) { @@ -140,20 +139,63 @@ template class SumcheckRound { const PowUnivariate& pow_univariate, const FF alpha) { - // For each edge_idx = 2i, we need to multiply the whole contribution by zeta^{2^{2i}} - // This means that each univariate for each relation needs an extra multiplication. - FF pow_challenge = pow_univariate.partial_evaluation_constant; - for (size_t edge_idx = 0; edge_idx < round_size; edge_idx += 2) { - extend_edges(polynomials, edge_idx); - - // Compute the i-th edge's univariate contribution, - // scale it by the pow polynomial's constant and zeta power "c_l ⋅ ζ_{l+1}ⁱ" - // and add it to the accumulators for Sˡ(Xₗ) - accumulate_relation_univariates<>(relation_parameters, pow_challenge); - // Update the pow polynomial's contribution c_l ⋅ ζ_{l+1}ⁱ for the next edge. - pow_challenge *= pow_univariate.zeta_pow_sqr; + // Precompute the vector of required powers of zeta + // TODO(luke): Parallelize this + std::vector pow_challenges(round_size >> 1); + pow_challenges[0] = pow_univariate.partial_evaluation_constant; + for (size_t i = 1; i < (round_size >> 1); ++i) { + pow_challenges[i] = pow_challenges[i - 1] * pow_univariate.zeta_pow_sqr; + } + + // Determine number of threads for multithreading. + // Note: Multithreading is "on" for every round but we reduce the number of threads from the max available based + // on a specified minimum number of iterations per thread. This eventually leads to the use of a single thread. + // For now we use a power of 2 number of threads simply to ensure the round size is evenly divided. + size_t max_num_threads = get_num_cpus_pow2(); // number of available threads (power of 2) + size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread + size_t desired_num_threads = round_size / min_iterations_per_thread; + size_t num_threads = std::min(desired_num_threads, max_num_threads); // fewer than max if justified + num_threads = num_threads > 0 ? num_threads : 1; // ensure num threads is >= 1 + size_t iterations_per_thread = round_size / num_threads; // actual iterations per thread + + // Constuct univariate accumulator containers; one per thread + std::vector thread_univariate_accumulators(num_threads); + for (auto& accum : thread_univariate_accumulators) { + zero_univariates(accum); } + // Constuct extended edge containers; one per thread + std::vector> extended_edges; + extended_edges.resize(num_threads); + + // Accumulate the contribution from each sub-relation accross each edge of the hyper-cube + parallel_for(num_threads, [&](size_t thread_idx) { + size_t start = thread_idx * iterations_per_thread; + size_t end = (thread_idx + 1) * iterations_per_thread; + + // For each edge_idx = 2i, we need to multiply the whole contribution by zeta^{2^{2i}} + // This means that each univariate for each relation needs an extra multiplication. + for (size_t edge_idx = start; edge_idx < end; edge_idx += 2) { + extend_edges(extended_edges[thread_idx], polynomials, edge_idx); + + // Update the pow polynomial's contribution c_l ⋅ ζ_{l+1}ⁱ for the next edge. + FF pow_challenge = pow_challenges[edge_idx >> 1]; + + // Compute the i-th edge's univariate contribution, + // scale it by the pow polynomial's constant and zeta power "c_l ⋅ ζ_{l+1}ⁱ" + // and add it to the accumulators for Sˡ(Xₗ) + accumulate_relation_univariates<>(thread_univariate_accumulators[thread_idx], + extended_edges[thread_idx], + relation_parameters, + pow_challenge); + } + }); + + // Accumulate the per-thread univariate accumulators into a single set of accumulators + for (auto& accumulators : thread_univariate_accumulators) { + add_nested_tuples(univariate_accumulators, accumulators); + } + // Batch the univariate contributions from each sub-relation to obtain the round univariate return batch_over_relations(alpha); } @@ -238,14 +280,18 @@ template class SumcheckRound { * appropriate scaling factors, produces S_l. */ template - void accumulate_relation_univariates(const RelationParameters& relation_parameters, const FF& scaling_factor) + void accumulate_relation_univariates(RelationUnivariates& univariate_accumulators, + const auto& extended_edges, + const RelationParameters& relation_parameters, + const FF& scaling_factor) { std::get(relations).add_edge_contribution( std::get(univariate_accumulators), extended_edges, relation_parameters, scaling_factor); // Repeat for the next relation. if constexpr (relation_idx + 1 < NUM_RELATIONS) { - accumulate_relation_univariates(relation_parameters, scaling_factor); + accumulate_relation_univariates( + univariate_accumulators, extended_edges, relation_parameters, scaling_factor); } } @@ -273,6 +319,9 @@ template class SumcheckRound { } public: + // TODO(luke): Potentially make RelationUnivarites (tuple of tuples of Univariates) a class and make these utility + // functions class methods. Alternatively, move all of these tuple utilities (and the ones living elsewhere) to + // their own module. /** * Utility methods for tuple of tuples of Univariates */ @@ -401,5 +450,41 @@ template class SumcheckRound { apply_to_tuple_of_arrays(operation, tuple); } } + + /** + * @brief Componentwise addition of two tuples + * @details Used for adding tuples of Univariates but in general works for any object for which += is + * defined. The result is stored in the first tuple. + * + * @tparam T Type of the elements contained in the tuples + * @param tuple_1 First summand. Result stored in this tuple + * @param tuple_2 Second summand + */ + template + static constexpr void add_tuples(std::tuple& tuple_1, const std::tuple& tuple_2) + { + [&](std::index_sequence) { ((std::get(tuple_1) += std::get(tuple_2)), ...); } + (std::make_index_sequence{}); + } + + /** + * @brief Componentwise addition of nested tuples (tuples of tuples) + * @details Used for summing tuples of tuples of Univariates. Needed for Sumcheck multithreading. Each thread + * accumulates realtion contributions across a portion of the hypecube and then the results are accumulated into a + * single nested tuple. + * + * @tparam Tuple + * @tparam Index Index into outer tuple + * @param tuple_1 First nested tuple summand. Result stored here + * @param tuple_2 Second summand + */ + template + static constexpr void add_nested_tuples(Tuple& tuple_1, const Tuple& tuple_2) + { + if constexpr (Index < std::tuple_size::value) { + add_tuples(std::get(tuple_1), std::get(tuple_2)); + add_nested_tuples(tuple_1, tuple_2); + } + } }; } // namespace proof_system::honk::sumcheck diff --git a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp index c5e7260be4..30fbbe543e 100644 --- a/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp +++ b/cpp/src/barretenberg/honk/sumcheck/sumcheck_round.test.cpp @@ -36,7 +36,7 @@ static Univariate compute_round_univariate( const RelationParameters& relation_parameters, const FF alpha) { - size_t round_size = 1; + size_t round_size = 2; // Improvement(Cody): This is ugly? Maye supply some/all of this data through "flavor" class? auto round = SumcheckRound(round_size); @@ -361,4 +361,39 @@ TEST(SumcheckRound, TuplesOfEvaluationArrays) EXPECT_EQ(std::get<1>(tuple_of_arrays)[1], 0); } +/** + * @brief Test utility functions for adding two tuples of tuples of Univariates + * + */ +TEST(SumcheckRound, AddTuplesOfTuplesOfUnivariates) +{ + using Flavor = proof_system::honk::flavor::Standard; + using FF = typename Flavor::FF; + + // Define some arbitrary univariates + Univariate univariate_1({ 1, 2 }); + Univariate univariate_2({ 2, 4 }); + Univariate univariate_3({ 3, 4, 5 }); + + Univariate univariate_4({ 3, 6 }); + Univariate univariate_5({ 8, 1 }); + Univariate univariate_6({ 3, 7, 1 }); + + Univariate expected_sum_1 = univariate_1 + univariate_4; + Univariate expected_sum_2 = univariate_2 + univariate_5; + Univariate expected_sum_3 = univariate_3 + univariate_6; + + // Construct two tuples of tuples + auto tuple_of_tuples_1 = + std::make_tuple(std::make_tuple(univariate_1), std::make_tuple(univariate_2, univariate_3)); + auto tuple_of_tuples_2 = + std::make_tuple(std::make_tuple(univariate_4), std::make_tuple(univariate_5, univariate_6)); + + SumcheckRound::add_nested_tuples(tuple_of_tuples_1, tuple_of_tuples_2); + + EXPECT_EQ(std::get<0>(std::get<0>(tuple_of_tuples_1)), expected_sum_1); + EXPECT_EQ(std::get<0>(std::get<1>(tuple_of_tuples_1)), expected_sum_2); + EXPECT_EQ(std::get<1>(std::get<1>(tuple_of_tuples_1)), expected_sum_3); +} + } // namespace test_sumcheck_round