feat: Relations vs widgets benchmarking (AztecProtocol#3931)

Add code to measure Plonk widget execution vs Honk relation execution. Outputs below. ## Summary of results Here are values of widget time/relation time: ``` Arithmetic 0.8342541436464088 GenPermSort 1.0814663951120163 Elliptic 0.8155940594059405 Auxiliary 0.7053435114503817 Plookup 0.7008310249307479 Permutation 1.192233009708738 ``` ## Benchmark outputs ``` % ./bin/relations_bench 17s ~/barretenberg-cpp/build cg/relation-check-bench + mainframe 2024-01-10T19:27:00+00:00 Running ./bin/relations_bench Run on (128 X 2649.99 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 12.24, 23.55, 50.13 -------------------------------------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------------------------------------------------------- execute_relation<honk::flavor::Ultra, UltraArithmeticRelation<Fr>> 543 ns 543 ns 1289230 execute_relation<honk::flavor::Ultra, GenPermSortRelation<Fr>> 491 ns 491 ns 1423778 execute_relation<honk::flavor::Ultra, EllipticRelation<Fr>> 808 ns 808 ns 866448 execute_relation<honk::flavor::Ultra, AuxiliaryRelation<Fr>> 1965 ns 1965 ns 357614 execute_relation<honk::flavor::Ultra, LookupRelation<Fr>> 722 ns 722 ns 969890 execute_relation<honk::flavor::Ultra, UltraPermutationRelation<Fr>> 515 ns 515 ns 1358778 ``` ``` % taskset -c 0 ./bin/widget_bench 1m 57s ~/barretenberg-cpp/build cg/relation-check-bench + mainframe 2024-01-10T19:15:11+00:00 Running ./bin/widget_bench Run on (128 X 2649.99 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 25.77, 74.54, 79.34 ---------------------------------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------------------------------------- quotient_contribution<ProverPlookupArithmeticWidget<ultra_settings>>/iterations:1/manual_time 453 ns 186172 ns 1 quotient_contribution<ProverGenPermSortWidget<ultra_settings>>/iterations:1/manual_time 531 ns 165501 ns 1 quotient_contribution<ProverEllipticWidget<ultra_settings>>/iterations:1/manual_time 659 ns 174561 ns 1 quotient_contribution<ProverPlookupAuxiliaryWidget<ultra_settings>>/iterations:1/manual_time 1386 ns 206071 ns 1 quotient_contribution<ProverPlookupWidget<4>>/iterations:1/manual_time 506 ns 188280 ns 1 quotient_contribution<ProverPermutationWidget<4, true>>/iterations:1/manual_time 614 ns 167110 ns 1 ```
Swoir · Jan 11, 2024 · def7514 · def7514
1 parent cb4454c
commit def7514
Show file tree

Hide file tree

Showing 4 changed files with 104 additions and 139 deletions.
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp
@@ -33,119 +33,27 @@ template <typename Flavor, typename Relation> void execute_relation(::benchmark:
         Relation::accumulate(accumulator, new_value, params, 1);
     }
 }
-
-void ultra_auxiliary_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::Ultra, AuxiliaryRelation<Fr>>(state);
-}
-BENCHMARK(ultra_auxiliary_relation);
-
-void ultra_elliptic_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::Ultra, EllipticRelation<Fr>>(state);
-}
-BENCHMARK(ultra_elliptic_relation);
-
-void ultra_ecc_op_queue_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::GoblinUltra, EccOpQueueRelation<Fr>>(state);
-}
-BENCHMARK(ultra_ecc_op_queue_relation);
-
-void ultra_gen_perm_sort_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::Ultra, GenPermSortRelation<Fr>>(state);
-}
-BENCHMARK(ultra_gen_perm_sort_relation);
-
-void ultralookup_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::Ultra, LookupRelation<Fr>>(state);
-}
-BENCHMARK(ultralookup_relation);
-
-void ultra_permutation_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::Ultra, UltraPermutationRelation<Fr>>(state);
-}
-BENCHMARK(ultra_permutation_relation);
-
-void ultra_arithmetic_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::Ultra, UltraArithmeticRelation<Fr>>(state);
-}
-BENCHMARK(ultra_arithmetic_relation);
-
-void translator_decomposition_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorDecompositionRelation<Fr>>(state);
-}
-BENCHMARK(translator_decomposition_relation);
-
-void translator_opcode_constraint_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorOpcodeConstraintRelation<Fr>>(state);
-}
-BENCHMARK(translator_opcode_constraint_relation);
-
-void translator_accumulator_transfer_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorAccumulatorTransferRelation<Fr>>(state);
-}
-BENCHMARK(translator_accumulator_transfer_relation);
-
-void translator_gen_perm_sort_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorGenPermSortRelation<Fr>>(state);
-}
-BENCHMARK(translator_gen_perm_sort_relation);
-
-void translator_non_native_field_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorNonNativeFieldRelation<Fr>>(state);
-}
-BENCHMARK(translator_non_native_field_relation);
-
-void translator_permutation_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorPermutationRelation<Fr>>(state);
-}
-BENCHMARK(translator_permutation_relation);
-
-void eccvm_lookup_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::ECCVM, ECCVMLookupRelation<Fq>>(state);
-}
-BENCHMARK(eccvm_lookup_relation);
-
-void eccvm_msm_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::ECCVM, ECCVMMSMRelation<Fq>>(state);
-}
-BENCHMARK(eccvm_msm_relation);
-
-void eccvm_point_table_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::ECCVM, ECCVMPointTableRelation<Fq>>(state);
-}
-BENCHMARK(eccvm_point_table_relation);
-
-void eccvm_set_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::ECCVM, ECCVMSetRelation<Fq>>(state);
-}
-BENCHMARK(eccvm_set_relation);
-
-void eccvm_transcript_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::ECCVM, ECCVMTranscriptRelation<Fq>>(state);
-}
-BENCHMARK(eccvm_transcript_relation);
-
-void eccvm_wnaf_relation(::benchmark::State& state) noexcept
-{
-    execute_relation<honk::flavor::ECCVM, ECCVMWnafRelation<Fq>>(state);
-}
-BENCHMARK(eccvm_wnaf_relation);
+BENCHMARK(execute_relation<honk::flavor::Ultra, UltraArithmeticRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::Ultra, GenPermSortRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::Ultra, EllipticRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::Ultra, AuxiliaryRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::Ultra, LookupRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::Ultra, UltraPermutationRelation<Fr>>);
+
+BENCHMARK(execute_relation<honk::flavor::GoblinUltra, EccOpQueueRelation<Fr>>);
+
+BENCHMARK(execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorDecompositionRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorOpcodeConstraintRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorAccumulatorTransferRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorGenPermSortRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorNonNativeFieldRelation<Fr>>);
+BENCHMARK(execute_relation<honk::flavor::GoblinTranslator, GoblinTranslatorPermutationRelation<Fr>>);
+
+BENCHMARK(execute_relation<honk::flavor::ECCVM, ECCVMLookupRelation<Fq>>);
+BENCHMARK(execute_relation<honk::flavor::ECCVM, ECCVMMSMRelation<Fq>>);
+BENCHMARK(execute_relation<honk::flavor::ECCVM, ECCVMPointTableRelation<Fq>>);
+BENCHMARK(execute_relation<honk::flavor::ECCVM, ECCVMSetRelation<Fq>>);
+BENCHMARK(execute_relation<honk::flavor::ECCVM, ECCVMTranscriptRelation<Fq>>);
+BENCHMARK(execute_relation<honk::flavor::ECCVM, ECCVMWnafRelation<Fq>>);
 
 } // namespace proof_system::benchmark::relations
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp
@@ -9,7 +9,15 @@ using namespace benchmark;
 using namespace proof_system;
 
 // The rounds to measure
-enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH };
+enum {
+    PREAMBLE,
+    WIRE_COMMITMENTS,
+    SORTED_LIST_ACCUMULATOR,
+    LOG_DERIVATIVE_INVERSE,
+    GRAND_PRODUCT_COMPUTATION,
+    RELATION_CHECK,
+    ZEROMORPH
+};
 
 /**
  * @details Benchmark ultrahonk by performing all the rounds, but only measuring one.
@@ -34,6 +42,7 @@ BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prov
     time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); });
     time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); });
     time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); });
+    time_if_index(LOG_DERIVATIVE_INVERSE, [&] { prover.execute_log_derivative_inverse_round(); });
     time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); });
     time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
     time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
@@ -65,6 +74,7 @@ BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
 ROUND_BENCHMARK(PREAMBLE)->Iterations(1);
 ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1);
 ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1);
+ROUND_BENCHMARK(LOG_DERIVATIVE_INVERSE)->Iterations(1);
 ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1);
 ROUND_BENCHMARK(RELATION_CHECK);
 ROUND_BENCHMARK(ZEROMORPH);
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_plonk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_plonk_rounds.bench.cpp
@@ -71,9 +71,9 @@ BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
 // Fast rounds take a long time to benchmark because of how we compute statistical significance.
 // Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part.
 ROUND_BENCHMARK(PREAMBLE)->Iterations(1);
-ROUND_BENCHMARK(FIRST_WIRE_COMMITMENTS);
-ROUND_BENCHMARK(SECOND_FIAT_SHAMIR_ETA);
-ROUND_BENCHMARK(THIRD_FIAT_SHAMIR_BETA_GAMMA);
-ROUND_BENCHMARK(FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT);
-ROUND_BENCHMARK(FIFTH_COMPUTE_QUOTIENT_EVALUTION);
-ROUND_BENCHMARK(SIXTH_BATCH_OPEN);
+ROUND_BENCHMARK(FIRST_WIRE_COMMITMENTS)->Iterations(1);
+ROUND_BENCHMARK(SECOND_FIAT_SHAMIR_ETA)->Iterations(1);
+ROUND_BENCHMARK(THIRD_FIAT_SHAMIR_BETA_GAMMA)->Iterations(1);
+ROUND_BENCHMARK(FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT)->Iterations(1);
+ROUND_BENCHMARK(FIFTH_COMPUTE_QUOTIENT_EVALUTION)->Iterations(1);
+ROUND_BENCHMARK(SIXTH_BATCH_OPEN)->Iterations(1);
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/widgets_bench/widget.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/widgets_bench/widget.bench.cpp
@@ -3,15 +3,32 @@
 #include "barretenberg/flavor/ultra.hpp"
 #include "barretenberg/plonk/composer/standard_composer.hpp"
 #include "barretenberg/plonk/composer/ultra_composer.hpp"
+#include "barretenberg/plonk/proof_system/widgets/random_widgets/permutation_widget.hpp"
+#include "barretenberg/plonk/proof_system/widgets/random_widgets/plookup_widget.hpp"
+#include "barretenberg/plonk/proof_system/widgets/transition_widgets/elliptic_widget.hpp"
+#include "barretenberg/plonk/proof_system/widgets/transition_widgets/genperm_sort_widget.hpp"
+#include "barretenberg/plonk/proof_system/widgets/transition_widgets/plookup_arithmetic_widget.hpp"
 #include "barretenberg/plonk/proof_system/widgets/transition_widgets/plookup_auxiliary_widget.hpp"
-#include <benchmark/benchmark.h>
+
+// The widgets are implemented in a non-uniform way where the transition widgets provide a per-row execution function
+// `accumulate_contribution` while the random widgets do not. Defining this preprocessor variable allows to derive a
+// per-row exeuction cost that is suitable for comparing against the cost of executing the Honk relations. For
+// validation, we also directly benchmark the available `accumulate_contribution` functions.
+//
+// NOTE: this code is to be run singly threaded via taskset, e.g. taskset -c 0
+// #define GET_PER_ROW_TIME
 
 namespace {
 auto& engine = numeric::random::get_debug_engine();
 }
 
 namespace proof_system::plonk {
 
+#ifdef GET_PER_ROW_TIME
+constexpr size_t LARGE_DOMAIN_SIZE = 4;
+constexpr size_t WIDGET_BENCH_TEST_CIRCUIT_SIZE = 1 << 16;
+#endif
+
 struct BasicPlonkKeyAndTranscript {
     std::shared_ptr<proving_key> key;
     transcript::StandardTranscript transcript;
@@ -22,8 +39,13 @@ BasicPlonkKeyAndTranscript get_plonk_key_and_transcript()
     barretenberg::srs::init_crs_factory("../srs_db/ignition");
     auto inner_composer = plonk::UltraComposer();
     auto builder = typename plonk::UltraComposer::CircuitBuilder();
-    bench_utils::generate_basic_arithmetic_circuit(builder, 80);
+    bench_utils::generate_basic_arithmetic_circuit(builder, 16);
     UltraProver inner_prover = inner_composer.create_prover(builder);
+#ifdef GET_PER_ROW_TIME
+    if (!(inner_prover.key->circuit_size == WIDGET_BENCH_TEST_CIRCUIT_SIZE)) {
+        throw_or_abort("Circit size changed; update value for accurate benchmarks");
+    }
+#endif
     inner_prover.construct_proof();
     return { inner_composer.circuit_proving_key, inner_prover.transcript };
 }
@@ -36,34 +58,59 @@ template <typename Flavor, typename Widget> void execute_widget(::benchmark::Sta
         widget.compute_quotient_contribution(barretenberg::fr::random_element(), data.transcript);
     }
 }
-void plookup_auxiliary_kernel(::benchmark::State& state) noexcept
+
+template <typename Widget> void quotient_contribution(::benchmark::State& state) noexcept
 {
     BasicPlonkKeyAndTranscript data = get_plonk_key_and_transcript();
+    Widget widget(data.key.get());
+    for (auto _ : state) {
+#ifdef GET_PER_ROW_TIME
+        auto start = std::chrono::high_resolution_clock::now();
+#endif
+        widget.compute_quotient_contribution(barretenberg::fr::random_element(), data.transcript);
+#ifdef GET_PER_ROW_TIME
+        auto end = std::chrono::high_resolution_clock::now();
+        auto elapsed_seconds = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
+        state.SetIterationTime(elapsed_seconds.count() / (LARGE_DOMAIN_SIZE * WIDGET_BENCH_TEST_CIRCUIT_SIZE));
+#endif
+    }
+}
 
-    using FFTGetter = ProverPlookupAuxiliaryWidget<ultra_settings>::FFTGetter;
-    using FFTKernel = ProverPlookupAuxiliaryWidget<ultra_settings>::FFTKernel;
+#ifdef GET_PER_ROW_TIME
+BENCHMARK(quotient_contribution<ProverPlookupArithmeticWidget<ultra_settings>>)->Iterations(1)->UseManualTime();
+BENCHMARK(quotient_contribution<ProverGenPermSortWidget<ultra_settings>>)->Iterations(1)->UseManualTime();
+BENCHMARK(quotient_contribution<ProverEllipticWidget<ultra_settings>>)->Iterations(1)->UseManualTime();
+BENCHMARK(quotient_contribution<ProverPlookupAuxiliaryWidget<ultra_settings>>)->Iterations(1)->UseManualTime();
+BENCHMARK(quotient_contribution<ProverPlookupWidget<4>>)->Iterations(1)->UseManualTime();
+BENCHMARK(quotient_contribution<ProverPermutationWidget<4, true>>)->Iterations(1)->UseManualTime();
+#else
+BENCHMARK(quotient_contribution<ProverPlookupArithmeticWidget<ultra_settings>>)->Iterations(1);
+BENCHMARK(quotient_contribution<ProverGenPermSortWidget<ultra_settings>>)->Iterations(1);
+BENCHMARK(quotient_contribution<ProverEllipticWidget<ultra_settings>>)->Iterations(1);
+BENCHMARK(quotient_contribution<ProverPlookupAuxiliaryWidget<ultra_settings>>)->Iterations(1);
+BENCHMARK(quotient_contribution<ProverPlookupWidget<4>>)->Iterations(1);
+BENCHMARK(quotient_contribution<ProverPermutationWidget<4, true>>)->Iterations(1);
+#endif
+
+template <typename Widget> void accumulate_contribution(::benchmark::State& state) noexcept
+{
+    BasicPlonkKeyAndTranscript data = get_plonk_key_and_transcript();
+
+    using FFTGetter = typename Widget::FFTGetter;
+    using FFTKernel = typename Widget::FFTKernel;
 
     auto polynomials = FFTGetter::get_polynomials(data.key.get(), FFTKernel::get_required_polynomial_ids());
     auto challenges = FFTGetter::get_challenges(
         data.transcript, barretenberg::fr::random_element(), FFTKernel::quotient_required_challenges);
 
     for (auto _ : state) {
-        // NOTE: this simply calls the following 3 functions it does NOT try to replicate ProverPlookupAuxiliaryWidget
-        // logic exactly
         barretenberg::fr result{ 0 };
         FFTKernel::accumulate_contribution(polynomials, challenges, result, 0);
     }
 }
-BENCHMARK(plookup_auxiliary_kernel);
-
-void plookup_auxiliary_widget(::benchmark::State& state) noexcept
-{
-    BasicPlonkKeyAndTranscript data = get_plonk_key_and_transcript();
-    ProverPlookupAuxiliaryWidget<ultra_settings> widget(data.key.get());
-    for (auto _ : state) {
-        widget.compute_quotient_contribution(barretenberg::fr::random_element(), data.transcript);
-    }
-}
-BENCHMARK(plookup_auxiliary_widget);
+BENCHMARK(accumulate_contribution<ProverPlookupArithmeticWidget<ultra_settings>>);
+BENCHMARK(accumulate_contribution<ProverGenPermSortWidget<ultra_settings>>);
+BENCHMARK(accumulate_contribution<ProverEllipticWidget<ultra_settings>>);
+BENCHMARK(accumulate_contribution<ProverPlookupAuxiliaryWidget<ultra_settings>>);
 
 } // namespace proof_system::plonk