From 753cf188a6a2e757085f4bad6d1fcec3f585b25a Mon Sep 17 00:00:00 2001 From: codygunton Date: Tue, 24 Sep 2024 05:21:09 +0000 Subject: [PATCH 01/21] Start; still no luck on mainframe --- barretenberg/cpp/CMakePresets.json | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 643e4590ae2..d8898f91c43 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -120,6 +120,37 @@ "ENABLE_TRACY": "ON" } }, + { + "name": "tracy-time", + "displayName": "Build for tracy time profiling", + "description": "Build for tracy time profiling", + "binaryDir": "build-tracy-time", + "inherits": "clang16", + "environment": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo", + "CFLAGS": "-g -fno-omit-frame-pointer", + "CXXFLAGS": "-g -fno-omit-frame-pointer", + "LDFLAGS": "-g -fno-omit-frame-pointer -rdynamic" + }, + "cacheVariables": { + "ENABLE_TRACY": "ON" + } + }, + { + "name": "tracy-default", + "displayName": "Release build with tracy using default clang", + "description": "Release build with tracy using default clang", + "inherits": "default", + "binaryDir": "build-tracy", + "cacheVariables": { + "ENABLE_TRACY": "ON", + "HAVE_STD_REGEX": "ON" + }, + "environment": { + "CXXFLAGS": "-DBB_USE_OP_COUNT -DBB_USE_OP_COUNT_TIME_ONLY" + } + }, + { "name": "tracy-gates", "displayName": "Release build with tracy - but hacked for gate tracking", @@ -476,6 +507,11 @@ "inherits": "default", "configurePreset": "tracy" }, + { + "name": "tracy-time", + "inherits": "default", + "configurePreset": "tracy-time" + }, { "name": "clang16-pic", "inherits": "default", From db93e9b6ec3f52df9eaf3065bfaff0dba80b03ba Mon Sep 17 00:00:00 2001 From: codygunton Date: Wed, 25 Sep 2024 16:05:07 +0000 Subject: [PATCH 02/21] Add local benchmarking script for convenience --- .../cpp/scripts/benchmark_tracy_local.sh | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 barretenberg/cpp/scripts/benchmark_tracy_local.sh diff --git a/barretenberg/cpp/scripts/benchmark_tracy_local.sh b/barretenberg/cpp/scripts/benchmark_tracy_local.sh new file mode 100755 index 00000000000..a60ab9c7945 --- /dev/null +++ b/barretenberg/cpp/scripts/benchmark_tracy_local.sh @@ -0,0 +1,31 @@ + +# NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe +# IF ON YOUR LOCAL COMPUTER USE NORMAL INTERACTIVE TRACY WORKFLOW +# the benchmark runs with headless capture and then we copy the trace file and run tracy profiler +# This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked +# however for any SSH setup, especially an ubuntu one. +# on local machine run: +# export USER=... +# export PRESET=...tracy for memory or tracy-gates for circuit gates... +# ssh $USER-box "cat ~/aztec-packages/barretenberg/cpp/scripts/benchmark_tracy.sh" | bash /dev/stdin $USER +set -eux +USER=${1:-$USER} +BOX=$USER-box +BENCHMARK=${2:-protogalaxy_bench} +COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=compute_row_evaluations/17} + +# Can also set PRESET=tracy-gates env variable +PRESET=${PRESET:-tracy} + +cd ~/aztec-packages/barretenberg/cpp/ +cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK +! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy +cd ~/tracy/capture + git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 +sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev +mkdir -p build && cd build && cmake .. && make -j +./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & +sleep 0.1 +cd ~/aztec-packages/barretenberg/cpp/build-$PRESET +ninja $BENCHMARK +sudo $COMMAND \ No newline at end of file From 4602ba6b779e4290b850b3e9c43af0b1c9b74849 Mon Sep 17 00:00:00 2001 From: codygunton Date: Wed, 25 Sep 2024 16:06:12 +0000 Subject: [PATCH 03/21] Update default preset --- barretenberg/cpp/scripts/benchmark_tracy_local.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/scripts/benchmark_tracy_local.sh b/barretenberg/cpp/scripts/benchmark_tracy_local.sh index a60ab9c7945..01fb2265a75 100755 --- a/barretenberg/cpp/scripts/benchmark_tracy_local.sh +++ b/barretenberg/cpp/scripts/benchmark_tracy_local.sh @@ -15,7 +15,7 @@ BENCHMARK=${2:-protogalaxy_bench} COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=compute_row_evaluations/17} # Can also set PRESET=tracy-gates env variable -PRESET=${PRESET:-tracy} +PRESET=${PRESET:-tracy-time} cd ~/aztec-packages/barretenberg/cpp/ cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK From 2736c7470172c2b05a5686019098c4a61dda835d Mon Sep 17 00:00:00 2001 From: codygunton Date: Wed, 25 Sep 2024 16:47:58 +0000 Subject: [PATCH 04/21] Base on clang16-dbg --- barretenberg/cpp/CMakePresets.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index d8898f91c43..9265a961217 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -125,13 +125,13 @@ "displayName": "Build for tracy time profiling", "description": "Build for tracy time profiling", "binaryDir": "build-tracy-time", - "inherits": "clang16", - "environment": { - "CMAKE_BUILD_TYPE": "RelWithDebInfo", - "CFLAGS": "-g -fno-omit-frame-pointer", - "CXXFLAGS": "-g -fno-omit-frame-pointer", - "LDFLAGS": "-g -fno-omit-frame-pointer -rdynamic" - }, + "inherits": "clang16-dbg", + // "environment": { + // "CMAKE_BUILD_TYPE": "RelWithDebInfo", + // "CFLAGS": "-g -fno-omit-frame-pointer", + // "CXXFLAGS": "-g -fno-omit-frame-pointer", + // "LDFLAGS": "-g -fno-omit-frame-pointer -rdynamic" + // }, "cacheVariables": { "ENABLE_TRACY": "ON" } From 654f95c3b999e119076fd75f3f7709033433d717 Mon Sep 17 00:00:00 2001 From: codygunton Date: Wed, 25 Sep 2024 17:15:38 +0000 Subject: [PATCH 05/21] Revert "Base on clang16-dbg" This reverts commit 2736c7470172c2b05a5686019098c4a61dda835d. --- barretenberg/cpp/CMakePresets.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 9265a961217..d8898f91c43 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -125,13 +125,13 @@ "displayName": "Build for tracy time profiling", "description": "Build for tracy time profiling", "binaryDir": "build-tracy-time", - "inherits": "clang16-dbg", - // "environment": { - // "CMAKE_BUILD_TYPE": "RelWithDebInfo", - // "CFLAGS": "-g -fno-omit-frame-pointer", - // "CXXFLAGS": "-g -fno-omit-frame-pointer", - // "LDFLAGS": "-g -fno-omit-frame-pointer -rdynamic" - // }, + "inherits": "clang16", + "environment": { + "CMAKE_BUILD_TYPE": "RelWithDebInfo", + "CFLAGS": "-g -fno-omit-frame-pointer", + "CXXFLAGS": "-g -fno-omit-frame-pointer", + "LDFLAGS": "-g -fno-omit-frame-pointer -rdynamic" + }, "cacheVariables": { "ENABLE_TRACY": "ON" } From 9bd40ce77659a9e5ac2e3d67fab9cda9f2a86c50 Mon Sep 17 00:00:00 2001 From: codygunton Date: Wed, 25 Sep 2024 17:22:47 +0000 Subject: [PATCH 06/21] Add deps --- barretenberg/cpp/scripts/benchmark_tracy_local.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/scripts/benchmark_tracy_local.sh b/barretenberg/cpp/scripts/benchmark_tracy_local.sh index 01fb2265a75..a31743bc23b 100755 --- a/barretenberg/cpp/scripts/benchmark_tracy_local.sh +++ b/barretenberg/cpp/scripts/benchmark_tracy_local.sh @@ -22,7 +22,7 @@ cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK ! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy cd ~/tracy/capture git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 -sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev +sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev libtbb-dev libfreetype-dev mkdir -p build && cd build && cmake .. && make -j ./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & sleep 0.1 From c0471d0d2275ae61e12fdf8862ce90b6bfa4c110 Mon Sep 17 00:00:00 2001 From: Cody Date: Wed, 25 Sep 2024 19:08:43 -0400 Subject: [PATCH 07/21] Add and rename scripts --- ...ocal.sh => benchmark_tracy_build_local.sh} | 0 ...hmark_tracy_build_mainframe_view_local.sh} | 0 ...chmark_tracy_build_on_benching_instance.sh | 42 +++++++++++++++++++ 3 files changed, 42 insertions(+) rename barretenberg/cpp/scripts/{benchmark_tracy_local.sh => benchmark_tracy_build_local.sh} (100%) rename barretenberg/cpp/scripts/{benchmark_tracy.sh => benchmark_tracy_build_mainframe_view_local.sh} (100%) create mode 100644 barretenberg/cpp/scripts/benchmark_tracy_build_on_benching_instance.sh diff --git a/barretenberg/cpp/scripts/benchmark_tracy_local.sh b/barretenberg/cpp/scripts/benchmark_tracy_build_local.sh similarity index 100% rename from barretenberg/cpp/scripts/benchmark_tracy_local.sh rename to barretenberg/cpp/scripts/benchmark_tracy_build_local.sh diff --git a/barretenberg/cpp/scripts/benchmark_tracy.sh b/barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_view_local.sh similarity index 100% rename from barretenberg/cpp/scripts/benchmark_tracy.sh rename to barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_view_local.sh diff --git a/barretenberg/cpp/scripts/benchmark_tracy_build_on_benching_instance.sh b/barretenberg/cpp/scripts/benchmark_tracy_build_on_benching_instance.sh new file mode 100644 index 00000000000..0bbe528a8b5 --- /dev/null +++ b/barretenberg/cpp/scripts/benchmark_tracy_build_on_benching_instance.sh @@ -0,0 +1,42 @@ + +# NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe +# IF ON YOUR LOCAL COMPUTER USE NORMAL INTERACTIVE TRACY WORKFLOW +# the benchmark runs with headless capture and then we copy the trace file and run tracy profiler +# This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked +# however for any SSH setup, especially an ubuntu one. +# on local machine run: +# export USER=... +# export PRESET=...tracy for memory or tracy-gates for circuit gates... +# ssh $USER-box "cat ~/aztec-packages/barretenberg/cpp/scripts/benchmark_tracy.sh" | bash /dev/stdin $USER +set -eux +USER=${1:-$USER} +BOX=$USER-box +BENCHMARK=${2:-protogalaxy_bench} +COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=compute_row_evaluations/17} + +# Can also set PRESET=tracy-gates env variable +PRESET=${PRESET:-tracy} + +ssh $BB_SSH_in " + set -eux ; + cd /mnt/user-data/$USER/aztec-packages/barretenberg/cpp/ ; + cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK ; + ! [ -d /mnt/user-data/$USER/tracy ] && git clone https://github.com/wolfpld/tracy /mnt/user-data/$USER/tracy ; + cd /mnt/user-data/$USER/tracy/capture ; + git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 ; + sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev ; + mkdir -p build && cd build && cmake .. && make -j ; + sudo ./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & ; + sleep 0.1 ; + cd /mnt/user-data/$USER/aztec-packages/barretenberg/cpp/build-$PRESET ; + ninja $BENCHMARK ; + sudo $COMMAND ; +" & +# wait # TODO(AD) hack - not sure why needed +# ! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy +# cd ~/tracy +# git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 # release 0.11.0 +# cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release +# cmake --build profiler/build --parallel +# scp $BOX:/mnt/user-data/$USER/tracy/capture/build/trace-$BENCHMARK . +# ~/tracy/profiler/build/tracy-profiler trace-$BENCHMARK From 61299e9375be7c7fc3b2a8d437464c7a1fb999ff Mon Sep 17 00:00:00 2001 From: codygunton Date: Thu, 26 Sep 2024 05:00:57 +0000 Subject: [PATCH 08/21] mainframe-side changes for remote build local execution --- .../client_ivc_bench/client_ivc.bench.cpp | 2 +- .../protogalaxy_bench/protogalaxy.bench.cpp | 2 +- .../benchmark/ultra_bench/mock_circuits.hpp | 2 +- .../execution_trace/execution_trace.cpp | 14 +++++++------- .../execution_trace/execution_trace.hpp | 6 +++--- .../cpp/src/barretenberg/flavor/flavor.hpp | 2 +- .../cpp/src/barretenberg/goblin/goblin.hpp | 16 ++++++++-------- .../composer/permutation_lib.hpp | 6 +++--- .../protogalaxy/protogalaxy_prover_impl.hpp | 4 ++-- .../srs/factories/file_crs_factory.hpp | 2 +- .../stdlib_circuit_builders/ultra_flavor.hpp | 4 ++-- .../cpp/src/barretenberg/sumcheck/sumcheck.hpp | 4 ++-- .../src/barretenberg/sumcheck/sumcheck_round.hpp | 4 ++-- .../barretenberg/ultra_honk/decider_prover.cpp | 2 +- .../ultra_honk/decider_proving_key.hpp | 4 ++-- .../src/barretenberg/ultra_honk/oink_prover.cpp | 10 +++++----- 16 files changed, 42 insertions(+), 42 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp index e891e9e23ce..ef198ebd595 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp @@ -101,7 +101,7 @@ BENCHMARK_DEFINE_F(ClientIVCBench, Full)(benchmark::State& state) verify_ivc(proof, ivc); } -#define ARGS Arg(ClientIVCBench::NUM_ITERATIONS_MEDIUM_COMPLEXITY) +#define ARGS Arg(ClientIVCBench::NUM_ITERATIONS_MEDIUM_COMPLEXITY)->Arg(1) BENCHMARK_REGISTER_F(ClientIVCBench, Full)->Unit(benchmark::kMillisecond)->ARGS; diff --git a/barretenberg/cpp/src/barretenberg/benchmark/protogalaxy_bench/protogalaxy.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/protogalaxy_bench/protogalaxy.bench.cpp index 105bbb564ce..864f8d7f383 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/protogalaxy_bench/protogalaxy.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/protogalaxy_bench/protogalaxy.bench.cpp @@ -75,7 +75,7 @@ void fold_k(State& state) noexcept } } -BENCHMARK(vector_of_evaluations)->DenseRange(15, 21)->Unit(kMillisecond); +BENCHMARK(vector_of_evaluations)->DenseRange(15, 21)->Unit(kMillisecond)->Iterations(1); BENCHMARK(compute_row_evaluations)->DenseRange(15, 21)->Unit(kMillisecond); // We stick to just k=1 for compile-time reasons. BENCHMARK(fold_k)->/* vary the circuit size */ DenseRange(14, 20)->Unit(kMillisecond); diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp index 27e53e13886..31c8aa46bd2 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp @@ -54,7 +54,7 @@ Prover get_prover(void (*test_circuit_function)(typename Prover::Flavor::Circuit Composer composer; return composer.create_prover(builder); } else { - ZoneScopedN("creating prover"); + // ZoneScopedN("creating prover"); return Prover(builder); } }; diff --git a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp index 70146181fcc..975847057a8 100644 --- a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp +++ b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp @@ -9,7 +9,7 @@ namespace bb { template void ExecutionTrace_::populate(Builder& builder, typename Flavor::ProvingKey& proving_key, bool is_structured) { - ZoneScopedN("trace populate"); + // ZoneScopedN("trace populate"); // Share wire polynomials, selector polynomials between proving key and builder and copy cycles from raw circuit // data auto trace_data = construct_trace_data(builder, proving_key, is_structured); @@ -18,18 +18,18 @@ void ExecutionTrace_::populate(Builder& builder, typename Flavor::Provin proving_key.pub_inputs_offset = trace_data.pub_inputs_offset; } if constexpr (IsUltraPlonkOrHonk) { - ZoneScopedN("add_memory_records_to_proving_key"); + // ZoneScopedN("add_memory_records_to_proving_key"); add_memory_records_to_proving_key(trace_data, builder, proving_key); } if constexpr (IsGoblinFlavor) { - ZoneScopedN("add_ecc_op_wires_to_proving_key"); + // ZoneScopedN("add_ecc_op_wires_to_proving_key"); add_ecc_op_wires_to_proving_key(builder, proving_key); } // Compute the permutation argument polynomials (sigma/id) and add them to proving key { - ZoneScopedN("compute_permutation_argument_polynomials"); + // ZoneScopedN("compute_permutation_argument_polynomials"); compute_permutation_argument_polynomials(builder, &proving_key, trace_data.copy_cycles); } } @@ -55,7 +55,7 @@ template typename ExecutionTrace_::TraceData ExecutionTrace_::construct_trace_data( Builder& builder, typename Flavor::ProvingKey& proving_key, bool is_structured) { - ZoneScopedN("construct_trace_data"); + // ZoneScopedN("construct_trace_data"); TraceData trace_data{ builder, proving_key }; // Complete the public inputs execution trace block from builder.public_inputs @@ -70,7 +70,7 @@ typename ExecutionTrace_::TraceData ExecutionTrace_::construct_t // Update wire polynomials and copy cycles // NB: The order of row/column loops is arbitrary but needs to be row/column to match old copy_cycle code { - ZoneScopedN("populating wires and copy_cycles"); + // ZoneScopedN("populating wires and copy_cycles"); for (uint32_t block_row_idx = 0; block_row_idx < block_size; ++block_row_idx) { for (uint32_t wire_idx = 0; wire_idx < NUM_WIRES; ++wire_idx) { uint32_t var_idx = block.wires[wire_idx][block_row_idx]; // an index into the variables array @@ -113,7 +113,7 @@ typename ExecutionTrace_::TraceData ExecutionTrace_::construct_t template void ExecutionTrace_::populate_public_inputs_block(Builder& builder) { - ZoneScopedN("populate_public_inputs_block"); + // ZoneScopedN("populate_public_inputs_block"); // Update the public inputs block for (auto& idx : builder.public_inputs) { for (size_t wire_idx = 0; wire_idx < NUM_WIRES; ++wire_idx) { diff --git a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp index 502ab6d2689..782abdb10e4 100644 --- a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp +++ b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp @@ -28,7 +28,7 @@ template class ExecutionTrace_ { TraceData(Builder& builder, ProvingKey& proving_key) { - ZoneScopedN("TraceData constructor"); + // ZoneScopedN("TraceData constructor"); if constexpr (IsHonkFlavor) { // Initialize and share the wire and selector polynomials for (auto [wire, other_wire] : zip_view(wires, proving_key.polynomials.get_wires())) { @@ -46,7 +46,7 @@ template class ExecutionTrace_ { proving_key.polynomial_store.put(wire_tag, wires[idx].share()); } { - ZoneScopedN("selector initialization"); + // ZoneScopedN("selector initialization"); for (size_t idx = 0; idx < Builder::Arithmetization::NUM_SELECTORS; ++idx) { selectors[idx] = Polynomial(proving_key.circuit_size); std::string selector_tag = builder.selector_names[idx] + "_lagrange"; @@ -55,7 +55,7 @@ template class ExecutionTrace_ { } } { - ZoneScopedN("copy cycle initialization"); + // ZoneScopedN("copy cycle initialization"); copy_cycles.resize(builder.variables.size()); } } diff --git a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp index e9cb9dbb63a..cae9d637a60 100644 --- a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp @@ -129,7 +129,7 @@ template class ProvingKey_ { std::shared_ptr commitment_key = nullptr) { if (commitment_key == nullptr) { - ZoneScopedN("init commitment key"); + // ZoneScopedN("init commitment key"); this->commitment_key = std::make_shared(circuit_size); } else { // Don't create another commitment key if we already have one diff --git a/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp b/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp index 134db00a929..594f04a6fdf 100644 --- a/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp +++ b/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp @@ -171,17 +171,17 @@ class GoblinProver { void prove_eccvm() { { - ZoneScopedN("Create ECCVMBuilder and ECCVMProver"); + // ZoneScopedN("Create ECCVMBuilder and ECCVMProver"); auto eccvm_builder = std::make_unique(op_queue); eccvm_prover = std::make_unique(*eccvm_builder); } { - ZoneScopedN("Construct ECCVM Proof"); + // ZoneScopedN("Construct ECCVM Proof"); goblin_proof.eccvm_proof = eccvm_prover->construct_proof(); } { - ZoneScopedN("Assign Translation Evaluations"); + // ZoneScopedN("Assign Translation Evaluations"); goblin_proof.translation_evaluations = eccvm_prover->translation_evaluations; } } @@ -198,14 +198,14 @@ class GoblinProver { eccvm_key = eccvm_prover->key; eccvm_prover = nullptr; { - ZoneScopedN("Create TranslatorBuilder and TranslatorProver"); + // ZoneScopedN("Create TranslatorBuilder and TranslatorProver"); auto translator_builder = std::make_unique(translation_batching_challenge_v, evaluation_challenge_x, op_queue); translator_prover = std::make_unique(*translator_builder, transcript); } { - ZoneScopedN("Construct Translator Proof"); + // ZoneScopedN("Construct Translator Proof"); goblin_proof.translator_proof = translator_prover->construct_proof(); } } @@ -219,14 +219,14 @@ class GoblinProver { */ GoblinProof prove(MergeProof merge_proof_in = {}) { - ZoneScopedN("Goblin::prove"); + // ZoneScopedN("Goblin::prove"); goblin_proof.merge_proof = merge_proof_in.empty() ? std::move(merge_proof) : std::move(merge_proof_in); { - ZoneScopedN("prove_eccvm"); + // ZoneScopedN("prove_eccvm"); prove_eccvm(); } { - ZoneScopedN("prove_translator"); + // ZoneScopedN("prove_translator"); prove_translator(); } return goblin_proof; diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp index 1590a4d5763..47739f215be 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp @@ -62,7 +62,7 @@ template struct PermutationMapping { */ PermutationMapping(size_t circuit_size) { - ZoneScopedN("PermutationMapping constructor"); + // ZoneScopedN("PermutationMapping constructor"); for (uint8_t col_idx = 0; col_idx < NUM_WIRES; ++col_idx) { sigmas[col_idx].reserve(circuit_size); if constexpr (generalized) { @@ -386,12 +386,12 @@ void compute_permutation_argument_polynomials(const typename Flavor::CircuitBuil } else if constexpr (IsUltraFlavor) { // any UltraHonk flavor // Compute Honk-style sigma and ID polynomials from the corresponding mappings { - ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); + // ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); compute_honk_style_permutation_lagrange_polynomials_from_mapping( key->polynomials.get_sigmas(), mapping.sigmas, key); } { - ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); + // ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); compute_honk_style_permutation_lagrange_polynomials_from_mapping( key->polynomials.get_ids(), mapping.ids, key); } diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp index 0a376672a86..29ac12fb505 100644 --- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp @@ -11,7 +11,7 @@ template void ProtogalaxyProver_::run_oink_prover_on_one_incomplete_key(std::shared_ptr keys, const std::string& domain_separator) { - ZoneScopedN("ProtogalaxyProver::run_oink_prover_on_one_incomplete_key"); + // ZoneScopedN("ProtogalaxyProver::run_oink_prover_on_one_incomplete_key"); OinkProver oink_prover(keys, transcript, domain_separator + '_'); oink_prover.prove(); } @@ -159,7 +159,7 @@ FoldingResult ProtogalaxyProver_ FoldingResult ProtogalaxyProver_::prove() { - ZoneScopedN("ProtogalaxyProver::prove"); + // ZoneScopedN("ProtogalaxyProver::prove"); BB_OP_COUNT_TIME_NAME("ProtogalaxyProver::prove"); // Ensure keys are all of the same size for (size_t idx = 0; idx < DeciderProvingKeys::NUM - 1; ++idx) { diff --git a/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp b/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp index f3eca37a48f..b28dfe0c056 100644 --- a/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp +++ b/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp @@ -44,7 +44,7 @@ template class FileProverCrs : public ProverCrs { FileProverCrs(const size_t num_points, std::string const& path) : num_points(num_points) { - ZoneScopedN("FileProverCrs constructor"); + // ZoneScopedN("FileProverCrs constructor"); monomials_ = scalar_multiplication::point_table_alloc(num_points); srs::IO::read_transcript_g1(monomials_.get(), num_points, path); diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp index fe5dbe526c9..0dea61f90a3 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp @@ -296,7 +296,7 @@ class UltraFlavor { ProverPolynomials() = default; ProverPolynomials(size_t circuit_size) { - ZoneScopedN("creating empty prover polys"); + // ZoneScopedN("creating empty prover polys"); for (auto& poly : get_to_be_shifted()) { poly = Polynomial{ /*memory size*/ circuit_size - 1, /*largest possible index*/ circuit_size, @@ -562,7 +562,7 @@ class UltraFlavor { PartiallyEvaluatedMultivariates() = default; PartiallyEvaluatedMultivariates(const size_t circuit_size) { - ZoneScopedN("PartiallyEvaluatedMultivariates constructor"); + // ZoneScopedN("PartiallyEvaluatedMultivariates constructor"); // Storage is only needed after the first partial evaluation, hence polynomials of // size (n / 2) for (auto& poly : this->get_all()) { diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp index fccab56b551..ba5a3f2a06e 100644 --- a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp +++ b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp @@ -209,7 +209,7 @@ template class SumcheckProver { auto round_univariate = round.compute_univariate( round_idx, full_polynomials, relation_parameters, gate_separators, alpha, zk_sumcheck_data); { - ZoneScopedN("rest of sumcheck round 1"); + // ZoneScopedN("rest of sumcheck round 1"); // Place the evaluations of the round univariate into transcript. transcript->send_to_verifier("Sumcheck:univariate_0", round_univariate); @@ -227,7 +227,7 @@ template class SumcheckProver { // We operate on partially_evaluated_polynomials in place. } for (size_t round_idx = 1; round_idx < multivariate_d; round_idx++) { - ZoneScopedN("sumcheck loop"); + // ZoneScopedN("sumcheck loop"); // Write the round univariate to the transcript round_univariate = round.compute_univariate(round_idx, partially_evaluated_polynomials, diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp index 4e9d11108af..37325072d5f 100644 --- a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp +++ b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp @@ -66,7 +66,7 @@ template class SumcheckProverRound { SumcheckProverRound(size_t initial_round_size) : round_size(initial_round_size) { - ZoneScopedN("SumcheckProverRound constructor"); + // ZoneScopedN("SumcheckProverRound constructor"); // Initialize univariate accumulators to 0 Utils::zero_univariates(univariate_accumulators); } @@ -161,7 +161,7 @@ template class SumcheckProverRound { const RelationSeparator alpha, std::optional> zk_sumcheck_data = std::nullopt) // only submitted when Flavor HasZK { - ZoneScopedN("compute_univariate"); + // ZoneScopedN("compute_univariate"); BB_OP_COUNT_TIME(); // Determine number of threads for multithreading. diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp index 5d8defd5036..13baaf194a0 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp @@ -31,7 +31,7 @@ template void DeciderProver_::execute_relation_ch size_t polynomial_size = proving_key->proving_key.circuit_size; auto sumcheck = Sumcheck(polynomial_size, transcript); { - ZoneScopedN("sumcheck.prove"); + // ZoneScopedN("sumcheck.prove"); sumcheck_output = sumcheck.prove(proving_key->proving_key.polynomials, proving_key->relation_parameters, proving_key->alphas, diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp index fa2e344b5a3..f5a9accd165 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp @@ -68,14 +68,14 @@ template class DeciderProvingKey_ { circuit.op_queue->append_nonzero_ops(); } { - ZoneScopedN("constructing proving key"); + // ZoneScopedN("constructing proving key"); proving_key = ProvingKey(dyadic_circuit_size, circuit.public_inputs.size(), commitment_key); } // Construct and add to proving key the wire, selector and copy constraint polynomials Trace::populate(circuit, proving_key, is_structured); - ZoneScopedN("constructing prover instance after trace populate"); + // ZoneScopedN("constructing prover instance after trace populate"); // If Goblin, construct the databus polynomials if constexpr (IsGoblinFlavor) { diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp index 26e0f1cca5a..38987abe796 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp @@ -14,29 +14,29 @@ namespace bb { template void OinkProver::prove() { { - ZoneScopedN("execute_preamble_round"); + // ZoneScopedN("execute_preamble_round"); // Add circuit size public input size and public inputs to transcript-> execute_preamble_round(); } { - ZoneScopedN("execute_wire_commitments_round"); + // ZoneScopedN("execute_wire_commitments_round"); // Compute first three wire commitments execute_wire_commitments_round(); } { - ZoneScopedN("execute_sorted_list_accumulator_round"); + // ZoneScopedN("execute_sorted_list_accumulator_round"); // Compute sorted list accumulator and commitment execute_sorted_list_accumulator_round(); } { - ZoneScopedN("execute_log_derivative_inverse_round"); + // ZoneScopedN("execute_log_derivative_inverse_round"); // Fiat-Shamir: beta & gamma execute_log_derivative_inverse_round(); } { - ZoneScopedN("execute_grand_product_computation_round"); + // ZoneScopedN("execute_grand_product_computation_round"); // Compute grand product(s) and commitments. execute_grand_product_computation_round(); } From 1393754f77801b2f6dd4c01f87d26402f6c8176d Mon Sep 17 00:00:00 2001 From: Cody Date: Thu, 26 Sep 2024 01:00:04 -0400 Subject: [PATCH 09/21] local-side changes for remote build local execution --- ...nchmark_tracy_build_mainframe_run_local.sh | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100755 barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_run_local.sh diff --git a/barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_run_local.sh b/barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_run_local.sh new file mode 100755 index 00000000000..b7004a0df26 --- /dev/null +++ b/barretenberg/cpp/scripts/benchmark_tracy_build_mainframe_run_local.sh @@ -0,0 +1,49 @@ + +# NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe +# IF ON YOUR LOCAL COMPUTER USE NORMAL INTERACTIVE TRACY WORKFLOW +# the benchmark runs with headless capture and then we copy the trace file and run tracy profiler +# This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked +# however for any SSH setup, especially an ubuntu one. +# on local machine run: +# export USER=... +# export PRESET=...tracy for memory or tracy-gates for circuit gates... +# ssh $USER-box "cat ~/aztec-packages/barretenberg/cpp/scripts/benchmark_tracy.sh" | bash /dev/stdin $USER +set -eux +USER=${1:-$USER} +BOX=$USER-box +BENCHMARK=${2:-protogalaxy_bench} +COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=fold_k/17} + +# Can also set PRESET=tracy-gates env variable +PRESET=${PRESET:-tracy-time} + +wait # TODO(AD) hack - not sure why needed +! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy +cd ~/tracy +git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 # release 0.11.0 +cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release +cmake --build profiler/build --parallel +cd - + +ssh $BOX " + set -eux ; + cd ~/aztec-packages/barretenberg/cpp/ ; + cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK ; +" & +wait +if [ ! -d build-$PRESET/bin ]; then + echo build-$PRESET/bin; + mkdir -p build-$PRESET/bin; +fi +scp $BOX:/mnt/user-data/$USER/aztec-packages/barretenberg/cpp/build-$PRESET/bin/$BENCHMARK build-$PRESET/bin/. ; +! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy ; +cd ~/tracy/capture ; + git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 ; +mkdir -p build && cd build && cmake .. && make -j ; + +./tracy-capture -a 127.0.0.1 -f -o ../trace-$BENCHMARK & +sleep 0.1 ; +cd ~/aztec-packages/barretenberg/cpp/build-$PRESET/ +$COMMAND ; + +~/tracy/profiler/build/tracy-profiler ~/tracy/capture/trace-$BENCHMARK From 0c2b10fcafc7deff05a82656e755755c1665309d Mon Sep 17 00:00:00 2001 From: codygunton Date: Thu, 26 Sep 2024 14:20:24 +0000 Subject: [PATCH 10/21] CIVC bench with two folds --- .../benchmark/client_ivc_bench/client_ivc.bench.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp index ef198ebd595..06aeb41f366 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/client_ivc_bench/client_ivc.bench.cpp @@ -101,7 +101,7 @@ BENCHMARK_DEFINE_F(ClientIVCBench, Full)(benchmark::State& state) verify_ivc(proof, ivc); } -#define ARGS Arg(ClientIVCBench::NUM_ITERATIONS_MEDIUM_COMPLEXITY)->Arg(1) +#define ARGS Arg(ClientIVCBench::NUM_ITERATIONS_MEDIUM_COMPLEXITY)->Arg(2) BENCHMARK_REGISTER_F(ClientIVCBench, Full)->Unit(benchmark::kMillisecond)->ARGS; From 08fc540bed76e4f928761b1f7ab1748503a1c958 Mon Sep 17 00:00:00 2001 From: codygunton Date: Thu, 26 Sep 2024 14:18:22 +0000 Subject: [PATCH 11/21] Prototype improvements to extend_to --- .../relations_bench/barycentric.bench.cpp | 30 +++++++++++++++++-- .../barretenberg/polynomials/univariate.hpp | 24 +++++++++++---- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/barycentric.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/barycentric.bench.cpp index 64db936c71a..d6bdd81cd51 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/barycentric.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/barycentric.bench.cpp @@ -14,14 +14,38 @@ using bb::Univariate; namespace bb::benchmark { -void extend_2_to_6(State& state) noexcept +void extend_2_to_11(State& state) noexcept { auto univariate = Univariate::get_random(); for (auto _ : state) { - DoNotOptimize(univariate.extend_to<6>()); + DoNotOptimize(univariate.extend_to<11>()); } } -BENCHMARK(extend_2_to_6); + +// 93.9s goes down to 62.7 +// Theoretical min: 1 sub, 9 additions at about 3.8ns each, 38ns +void fake_extend_2_to_11(State& state) noexcept +{ + std::array univariate; + std::generate(univariate.begin(), univariate.end(), [&]() { return FF::random_element(); }); + + const auto extend_to_11 = [](auto& arr) { + FF tmp = arr[1]; + const FF delta = tmp - arr[0]; + for (size_t idx = 2; idx < 10; idx++) { + arr[idx] = (tmp += delta); // fused ~> 62.9ns; non-fused ~>69.5ns + } + arr[10] = tmp; // save one +=; + return arr; + }; + + for (auto _ : state) { + DoNotOptimize(extend_to_11(univariate)); + } +} + +BENCHMARK(extend_2_to_11); +BENCHMARK(fake_extend_2_to_11); } // namespace bb::benchmark diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp index 01fe32d72c6..9282b8eef59 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp @@ -83,8 +83,22 @@ template Univariate extend_to() const { - const size_t EXTENDED_LENGTH = EXTENDED_DOMAIN_END - domain_start; + static constexpr size_t EXTENDED_LENGTH = EXTENDED_DOMAIN_END - domain_start; using Data = BarycentricData; static_assert(EXTENDED_LENGTH >= LENGTH); @@ -370,8 +384,8 @@ template Date: Thu, 26 Sep 2024 18:04:44 +0000 Subject: [PATCH 12/21] Implement and use self_extend_from --- .../benchmark/relations_bench/barycentric.bench.cpp | 12 ++++++++++++ .../barretenberg/polynomials/barycentric.test.cpp | 12 ++++++++++++ .../cpp/src/barretenberg/polynomials/univariate.hpp | 12 ++++++++++++ .../protogalaxy/protogalaxy_prover_internal.hpp | 8 +++++--- .../cpp/src/barretenberg/ultra_honk/decider_keys.hpp | 11 +++++------ 5 files changed, 46 insertions(+), 9 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/barycentric.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/barycentric.bench.cpp index d6bdd81cd51..243f4f4137c 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/barycentric.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/barycentric.bench.cpp @@ -44,8 +44,20 @@ void fake_extend_2_to_11(State& state) noexcept } } +// 93.9s goes down to 62.7 +// Theoretical min: 1 sub, 9 additions at about 3.8ns each, 38ns +void self_extend_2_to_11(State& state) noexcept +{ + auto univariate = Univariate::get_random(); + + for (auto _ : state) { + univariate.self_extend_from<2>(); + } +} + BENCHMARK(extend_2_to_11); BENCHMARK(fake_extend_2_to_11); +BENCHMARK(self_extend_2_to_11); } // namespace bb::benchmark diff --git a/barretenberg/cpp/src/barretenberg/polynomials/barycentric.test.cpp b/barretenberg/cpp/src/barretenberg/polynomials/barycentric.test.cpp index a481f9fc64e..2f708175430 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/barycentric.test.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/barycentric.test.cpp @@ -36,6 +36,18 @@ TYPED_TEST(BarycentricDataTests, Extend) EXPECT_EQ(result, expected_result); } +TYPED_TEST(BarycentricDataTests, SelfExtend) +{ + BARYCENTIC_DATA_TESTS_TYPE_ALIASES + static constexpr size_t initial_size(2); + static constexpr size_t domain_size(10); + static constexpr size_t skip_count(0); + auto f = Univariate({ 1, 2, 0, 0, 0, 0, 0, 0, 0, 0 }); + auto expected_result = Univariate({ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }); + f.template self_extend_from(); + EXPECT_EQ(f, expected_result); +} + TYPED_TEST(BarycentricDataTests, Evaluate) { BARYCENTIC_DATA_TESTS_TYPE_ALIASES diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp index 9282b8eef59..2a327dff94f 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp @@ -481,6 +481,18 @@ template void self_extend_from() + { + if constexpr (INITIAL_LENGTH == 2) { + const Fr delta = value_at(1) - value_at(0); + Fr next = value_at(1); + for (size_t idx = 2; idx < LENGTH; idx++) { + next += delta; + value_at(idx) = next; + } + } + } + /** * @brief Evaluate a univariate at a point u not known at compile time * and assumed not to be in the domain (else we divide by zero). diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp index 4fd90abed34..ac3b9d14c28 100644 --- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp +++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp @@ -200,9 +200,11 @@ template class ProtogalaxyProverInternal { const DeciderPKs& keys, const size_t row_idx) { - const auto base_univariates = keys.template row_to_univariates(row_idx); - for (auto [extended_univariate, base_univariate] : zip_view(extended_univariates.get_all(), base_univariates)) { - extended_univariate = base_univariate.template extend_to(); + auto incoming_univariates = keys.template row_to_univariates(row_idx); + for (auto [extended_univariate, incoming_univariate] : + zip_view(extended_univariates.get_all(), incoming_univariates)) { + incoming_univariate.template self_extend_from(); + extended_univariate = incoming_univariate; } } diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_keys.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_keys.hpp index e6ef907c622..49b72bbd0e2 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_keys.hpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_keys.hpp @@ -53,19 +53,18 @@ template struct DeciderProvingKeys_ { * @tparam skip_count Construct univariates that skip some of the indices when computing results * @return The univariates whose extensions will be used to construct the combiner. */ - template auto row_to_univariates(size_t row_idx) const + template auto row_to_univariates(size_t row_idx) const { auto prover_polynomials_views = get_polynomials_views(); - std::array, prover_polynomials_views[0].size()> results; + std::array, prover_polynomials_views[0].size()> results; // Set the size corresponding to the number of rows in the execution trace - size_t pk_idx = 0; // Iterate over the prover polynomials' views corresponding to each proving key - for (auto& get_all : prover_polynomials_views) { + for (size_t dpk_idx = 0; auto& get_all : prover_polynomials_views) { // Iterate over all columns in the trace execution of an proving key and extract their value at row_idx. for (auto [result, poly_ptr] : zip_view(results, get_all)) { - result.evaluations[pk_idx] = poly_ptr[row_idx]; + result.evaluations[dpk_idx] = poly_ptr[row_idx]; } - pk_idx++; + dpk_idx++; } return results; } From bf69253b1113946904a7647d622ff8cf92892d5e Mon Sep 17 00:00:00 2001 From: codygunton Date: Thu, 26 Sep 2024 18:20:16 +0000 Subject: [PATCH 13/21] revert change to extend_to --- barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp index 2a327dff94f..f86fc99ae2e 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/univariate.hpp @@ -384,8 +384,8 @@ template Date: Thu, 26 Sep 2024 18:30:25 +0000 Subject: [PATCH 14/21] Does std::move help? --- .../barretenberg/protogalaxy/protogalaxy_prover_internal.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp index ac3b9d14c28..aec2aeaf2c3 100644 --- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp +++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_internal.hpp @@ -204,7 +204,7 @@ template class ProtogalaxyProverInternal { for (auto [extended_univariate, incoming_univariate] : zip_view(extended_univariates.get_all(), incoming_univariates)) { incoming_univariate.template self_extend_from(); - extended_univariate = incoming_univariate; + extended_univariate = std::move(incoming_univariate); } } From 12d655789d7c417610372903025ebd0a54fea389 Mon Sep 17 00:00:00 2001 From: codygunton Date: Thu, 26 Sep 2024 19:38:57 +0000 Subject: [PATCH 15/21] WIP using macros well --- barretenberg/cpp/CMakeLists.txt | 5 +++++ barretenberg/cpp/CMakePresets.json | 9 +++++---- .../barretenberg/benchmark/ultra_bench/mock_circuits.hpp | 4 +++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/barretenberg/cpp/CMakeLists.txt b/barretenberg/cpp/CMakeLists.txt index 3b4e3c8a98f..c01c462bd63 100644 --- a/barretenberg/cpp/CMakeLists.txt +++ b/barretenberg/cpp/CMakeLists.txt @@ -61,6 +61,11 @@ else() SET(TRACY_LIBS) endif() +if(TRACY_PROFILE_MEMORY) + add_compile_options(-DTRACY_MEMORY) +endif() + + if(ENABLE_ASAN) add_compile_options(-fsanitize=address) add_link_options(-fsanitize=address) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index d8898f91c43..b612421c921 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -111,13 +111,14 @@ } }, { - "name": "tracy", + "name": "tracy-memory", "displayName": "Release build with tracy, optimized for memory tracking", "description": "Release build with tracy, optimized for memory tracking", "inherits": "clang16", "binaryDir": "build-tracy", "cacheVariables": { - "ENABLE_TRACY": "ON" + "ENABLE_TRACY": "ON", + "TRACY_PROFILE_MEMORY": "ON" } }, { @@ -503,9 +504,9 @@ "configurePreset": "clang16-dbg" }, { - "name": "tracy", + "name": "tracy-memory", "inherits": "default", - "configurePreset": "tracy" + "configurePreset": "tracy-memory" }, { "name": "tracy-time", diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp index 31c8aa46bd2..0d2787095f3 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/mock_circuits.hpp @@ -54,7 +54,9 @@ Prover get_prover(void (*test_circuit_function)(typename Prover::Flavor::Circuit Composer composer; return composer.create_prover(builder); } else { - // ZoneScopedN("creating prover"); +#ifdef TRACY_MEMORY + ZoneScopedN("creating prover"); +#endif return Prover(builder); } }; From be4756f28c0fef56044948b63d2aa93bf78fa983 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Thu, 26 Sep 2024 22:15:09 +0000 Subject: [PATCH 16/21] delete two tracy benchmark scripts --- .../scripts/benchmark_tracy_build_local.sh | 31 -------------- ...chmark_tracy_build_on_benching_instance.sh | 42 ------------------- 2 files changed, 73 deletions(-) delete mode 100755 barretenberg/cpp/scripts/benchmark_tracy_build_local.sh delete mode 100644 barretenberg/cpp/scripts/benchmark_tracy_build_on_benching_instance.sh diff --git a/barretenberg/cpp/scripts/benchmark_tracy_build_local.sh b/barretenberg/cpp/scripts/benchmark_tracy_build_local.sh deleted file mode 100755 index a31743bc23b..00000000000 --- a/barretenberg/cpp/scripts/benchmark_tracy_build_local.sh +++ /dev/null @@ -1,31 +0,0 @@ - -# NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe -# IF ON YOUR LOCAL COMPUTER USE NORMAL INTERACTIVE TRACY WORKFLOW -# the benchmark runs with headless capture and then we copy the trace file and run tracy profiler -# This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked -# however for any SSH setup, especially an ubuntu one. -# on local machine run: -# export USER=... -# export PRESET=...tracy for memory or tracy-gates for circuit gates... -# ssh $USER-box "cat ~/aztec-packages/barretenberg/cpp/scripts/benchmark_tracy.sh" | bash /dev/stdin $USER -set -eux -USER=${1:-$USER} -BOX=$USER-box -BENCHMARK=${2:-protogalaxy_bench} -COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=compute_row_evaluations/17} - -# Can also set PRESET=tracy-gates env variable -PRESET=${PRESET:-tracy-time} - -cd ~/aztec-packages/barretenberg/cpp/ -cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK -! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy -cd ~/tracy/capture - git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 -sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev libtbb-dev libfreetype-dev -mkdir -p build && cd build && cmake .. && make -j -./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & -sleep 0.1 -cd ~/aztec-packages/barretenberg/cpp/build-$PRESET -ninja $BENCHMARK -sudo $COMMAND \ No newline at end of file diff --git a/barretenberg/cpp/scripts/benchmark_tracy_build_on_benching_instance.sh b/barretenberg/cpp/scripts/benchmark_tracy_build_on_benching_instance.sh deleted file mode 100644 index 0bbe528a8b5..00000000000 --- a/barretenberg/cpp/scripts/benchmark_tracy_build_on_benching_instance.sh +++ /dev/null @@ -1,42 +0,0 @@ - -# NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe -# IF ON YOUR LOCAL COMPUTER USE NORMAL INTERACTIVE TRACY WORKFLOW -# the benchmark runs with headless capture and then we copy the trace file and run tracy profiler -# This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked -# however for any SSH setup, especially an ubuntu one. -# on local machine run: -# export USER=... -# export PRESET=...tracy for memory or tracy-gates for circuit gates... -# ssh $USER-box "cat ~/aztec-packages/barretenberg/cpp/scripts/benchmark_tracy.sh" | bash /dev/stdin $USER -set -eux -USER=${1:-$USER} -BOX=$USER-box -BENCHMARK=${2:-protogalaxy_bench} -COMMAND=${3:-./bin/$BENCHMARK --benchmark_filter=compute_row_evaluations/17} - -# Can also set PRESET=tracy-gates env variable -PRESET=${PRESET:-tracy} - -ssh $BB_SSH_in " - set -eux ; - cd /mnt/user-data/$USER/aztec-packages/barretenberg/cpp/ ; - cmake --preset $PRESET && cmake --build --preset $PRESET --target $BENCHMARK ; - ! [ -d /mnt/user-data/$USER/tracy ] && git clone https://github.com/wolfpld/tracy /mnt/user-data/$USER/tracy ; - cd /mnt/user-data/$USER/tracy/capture ; - git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 ; - sudo apt-get install -y libdbus-1-dev libdbus-glib-1-dev ; - mkdir -p build && cd build && cmake .. && make -j ; - sudo ./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & ; - sleep 0.1 ; - cd /mnt/user-data/$USER/aztec-packages/barretenberg/cpp/build-$PRESET ; - ninja $BENCHMARK ; - sudo $COMMAND ; -" & -# wait # TODO(AD) hack - not sure why needed -# ! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy -# cd ~/tracy -# git checkout 075395620a504c0cdcaf9bab3d196db16a043de7 # release 0.11.0 -# cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release -# cmake --build profiler/build --parallel -# scp $BOX:/mnt/user-data/$USER/tracy/capture/build/trace-$BENCHMARK . -# ~/tracy/profiler/build/tracy-profiler trace-$BENCHMARK From 6e69ec8156dcfbc83376f57a7caeb7c99215a720 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Thu, 26 Sep 2024 22:15:36 +0000 Subject: [PATCH 17/21] test out TRACY_MEMORY variable to see if it works (it does) --- barretenberg/cpp/CMakePresets.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index b612421c921..882dbb9b9f3 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -115,7 +115,7 @@ "displayName": "Release build with tracy, optimized for memory tracking", "description": "Release build with tracy, optimized for memory tracking", "inherits": "clang16", - "binaryDir": "build-tracy", + "binaryDir": "build-tracy-memory", "cacheVariables": { "ENABLE_TRACY": "ON", "TRACY_PROFILE_MEMORY": "ON" From bd7324a7b5baa4ce21e94af85cd6e46d78d4c797 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Thu, 26 Sep 2024 22:55:30 +0000 Subject: [PATCH 18/21] use TRACY_MEMORY flag so that zones don't affect other tracy builds --- .../execution_trace/execution_trace.cpp | 35 ++++++++++++---- .../execution_trace/execution_trace.hpp | 15 +++++-- .../cpp/src/barretenberg/flavor/flavor.hpp | 5 ++- .../cpp/src/barretenberg/goblin/goblin.hpp | 40 +++++++++++++++---- .../composer/permutation_lib.hpp | 15 +++++-- .../protogalaxy/protogalaxy_prover_impl.hpp | 10 ++++- .../srs/factories/file_crs_factory.hpp | 5 ++- .../stdlib_circuit_builders/ultra_flavor.hpp | 10 ++++- .../src/barretenberg/sumcheck/sumcheck.hpp | 10 ++++- .../barretenberg/sumcheck/sumcheck_round.hpp | 10 ++++- .../ultra_honk/decider_prover.cpp | 5 ++- .../ultra_honk/decider_proving_key.hpp | 10 ++++- .../barretenberg/ultra_honk/oink_prover.cpp | 25 +++++++++--- 13 files changed, 156 insertions(+), 39 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp index 975847057a8..2e2b1e1015a 100644 --- a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp +++ b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.cpp @@ -9,7 +9,10 @@ namespace bb { template void ExecutionTrace_::populate(Builder& builder, typename Flavor::ProvingKey& proving_key, bool is_structured) { - // ZoneScopedN("trace populate"); + +#ifdef TRACY_MEMORY + ZoneScopedN("trace populate"); +#endif // Share wire polynomials, selector polynomials between proving key and builder and copy cycles from raw circuit // data auto trace_data = construct_trace_data(builder, proving_key, is_structured); @@ -18,18 +21,27 @@ void ExecutionTrace_::populate(Builder& builder, typename Flavor::Provin proving_key.pub_inputs_offset = trace_data.pub_inputs_offset; } if constexpr (IsUltraPlonkOrHonk) { - // ZoneScopedN("add_memory_records_to_proving_key"); + +#ifdef TRACY_MEMORY + ZoneScopedN("add_memory_records_to_proving_key"); +#endif add_memory_records_to_proving_key(trace_data, builder, proving_key); } if constexpr (IsGoblinFlavor) { - // ZoneScopedN("add_ecc_op_wires_to_proving_key"); + +#ifdef TRACY_MEMORY + ZoneScopedN("add_ecc_op_wires_to_proving_key"); +#endif add_ecc_op_wires_to_proving_key(builder, proving_key); } // Compute the permutation argument polynomials (sigma/id) and add them to proving key { - // ZoneScopedN("compute_permutation_argument_polynomials"); + +#ifdef TRACY_MEMORY + ZoneScopedN("compute_permutation_argument_polynomials"); +#endif compute_permutation_argument_polynomials(builder, &proving_key, trace_data.copy_cycles); } } @@ -55,7 +67,10 @@ template typename ExecutionTrace_::TraceData ExecutionTrace_::construct_trace_data( Builder& builder, typename Flavor::ProvingKey& proving_key, bool is_structured) { - // ZoneScopedN("construct_trace_data"); + +#ifdef TRACY_MEMORY + ZoneScopedN("construct_trace_data"); +#endif TraceData trace_data{ builder, proving_key }; // Complete the public inputs execution trace block from builder.public_inputs @@ -70,7 +85,10 @@ typename ExecutionTrace_::TraceData ExecutionTrace_::construct_t // Update wire polynomials and copy cycles // NB: The order of row/column loops is arbitrary but needs to be row/column to match old copy_cycle code { - // ZoneScopedN("populating wires and copy_cycles"); + +#ifdef TRACY_MEMORY + ZoneScopedN("populating wires and copy_cycles"); +#endif for (uint32_t block_row_idx = 0; block_row_idx < block_size; ++block_row_idx) { for (uint32_t wire_idx = 0; wire_idx < NUM_WIRES; ++wire_idx) { uint32_t var_idx = block.wires[wire_idx][block_row_idx]; // an index into the variables array @@ -113,7 +131,10 @@ typename ExecutionTrace_::TraceData ExecutionTrace_::construct_t template void ExecutionTrace_::populate_public_inputs_block(Builder& builder) { - // ZoneScopedN("populate_public_inputs_block"); + +#ifdef TRACY_MEMORY + ZoneScopedN("populate_public_inputs_block"); +#endif // Update the public inputs block for (auto& idx : builder.public_inputs) { for (size_t wire_idx = 0; wire_idx < NUM_WIRES; ++wire_idx) { diff --git a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp index 782abdb10e4..814e69d2620 100644 --- a/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp +++ b/barretenberg/cpp/src/barretenberg/execution_trace/execution_trace.hpp @@ -28,7 +28,10 @@ template class ExecutionTrace_ { TraceData(Builder& builder, ProvingKey& proving_key) { - // ZoneScopedN("TraceData constructor"); + +#ifdef TRACY_MEMORY + ZoneScopedN("TraceData constructor"); +#endif if constexpr (IsHonkFlavor) { // Initialize and share the wire and selector polynomials for (auto [wire, other_wire] : zip_view(wires, proving_key.polynomials.get_wires())) { @@ -46,7 +49,10 @@ template class ExecutionTrace_ { proving_key.polynomial_store.put(wire_tag, wires[idx].share()); } { - // ZoneScopedN("selector initialization"); + +#ifdef TRACY_MEMORY + ZoneScopedN("selector initialization"); +#endif for (size_t idx = 0; idx < Builder::Arithmetization::NUM_SELECTORS; ++idx) { selectors[idx] = Polynomial(proving_key.circuit_size); std::string selector_tag = builder.selector_names[idx] + "_lagrange"; @@ -55,7 +61,10 @@ template class ExecutionTrace_ { } } { - // ZoneScopedN("copy cycle initialization"); + +#ifdef TRACY_MEMORY + ZoneScopedN("copy cycle initialization"); +#endif copy_cycles.resize(builder.variables.size()); } } diff --git a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp index cae9d637a60..f67d019ba69 100644 --- a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp @@ -129,7 +129,10 @@ template class ProvingKey_ { std::shared_ptr commitment_key = nullptr) { if (commitment_key == nullptr) { - // ZoneScopedN("init commitment key"); + +#ifdef TRACY_MEMORY + ZoneScopedN("init commitment key"); +#endif this->commitment_key = std::make_shared(circuit_size); } else { // Don't create another commitment key if we already have one diff --git a/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp b/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp index 594f04a6fdf..4ef27dbcd9c 100644 --- a/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp +++ b/barretenberg/cpp/src/barretenberg/goblin/goblin.hpp @@ -171,17 +171,26 @@ class GoblinProver { void prove_eccvm() { { - // ZoneScopedN("Create ECCVMBuilder and ECCVMProver"); + +#ifdef TRACY_MEMORY + ZoneScopedN("Create ECCVMBuilder and ECCVMProver"); +#endif auto eccvm_builder = std::make_unique(op_queue); eccvm_prover = std::make_unique(*eccvm_builder); } { - // ZoneScopedN("Construct ECCVM Proof"); + +#ifdef TRACY_MEMORY + ZoneScopedN("Construct ECCVM Proof"); +#endif goblin_proof.eccvm_proof = eccvm_prover->construct_proof(); } { - // ZoneScopedN("Assign Translation Evaluations"); + +#ifdef TRACY_MEMORY + ZoneScopedN("Assign Translation Evaluations"); +#endif goblin_proof.translation_evaluations = eccvm_prover->translation_evaluations; } } @@ -198,14 +207,20 @@ class GoblinProver { eccvm_key = eccvm_prover->key; eccvm_prover = nullptr; { - // ZoneScopedN("Create TranslatorBuilder and TranslatorProver"); + +#ifdef TRACY_MEMORY + ZoneScopedN("Create TranslatorBuilder and TranslatorProver"); +#endif auto translator_builder = std::make_unique(translation_batching_challenge_v, evaluation_challenge_x, op_queue); translator_prover = std::make_unique(*translator_builder, transcript); } { - // ZoneScopedN("Construct Translator Proof"); + +#ifdef TRACY_MEMORY + ZoneScopedN("Construct Translator Proof"); +#endif goblin_proof.translator_proof = translator_prover->construct_proof(); } } @@ -219,14 +234,23 @@ class GoblinProver { */ GoblinProof prove(MergeProof merge_proof_in = {}) { - // ZoneScopedN("Goblin::prove"); + +#ifdef TRACY_MEMORY + ZoneScopedN("Goblin::prove"); +#endif goblin_proof.merge_proof = merge_proof_in.empty() ? std::move(merge_proof) : std::move(merge_proof_in); { - // ZoneScopedN("prove_eccvm"); + +#ifdef TRACY_MEMORY + ZoneScopedN("prove_eccvm"); +#endif prove_eccvm(); } { - // ZoneScopedN("prove_translator"); + +#ifdef TRACY_MEMORY + ZoneScopedN("prove_translator"); +#endif prove_translator(); } return goblin_proof; diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp index 47739f215be..3a87d0efdab 100644 --- a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp +++ b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/composer/permutation_lib.hpp @@ -62,7 +62,10 @@ template struct PermutationMapping { */ PermutationMapping(size_t circuit_size) { - // ZoneScopedN("PermutationMapping constructor"); + +#ifdef TRACY_MEMORY + ZoneScopedN("PermutationMapping constructor"); +#endif for (uint8_t col_idx = 0; col_idx < NUM_WIRES; ++col_idx) { sigmas[col_idx].reserve(circuit_size); if constexpr (generalized) { @@ -386,12 +389,18 @@ void compute_permutation_argument_polynomials(const typename Flavor::CircuitBuil } else if constexpr (IsUltraFlavor) { // any UltraHonk flavor // Compute Honk-style sigma and ID polynomials from the corresponding mappings { - // ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); + +#ifdef TRACY_MEMORY + ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); +#endif compute_honk_style_permutation_lagrange_polynomials_from_mapping( key->polynomials.get_sigmas(), mapping.sigmas, key); } { - // ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); + +#ifdef TRACY_MEMORY + ZoneScopedN("compute_honk_style_permutation_lagrange_polynomials_from_mapping"); +#endif compute_honk_style_permutation_lagrange_polynomials_from_mapping( key->polynomials.get_ids(), mapping.ids, key); } diff --git a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp index 29ac12fb505..85266185d91 100644 --- a/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp +++ b/barretenberg/cpp/src/barretenberg/protogalaxy/protogalaxy_prover_impl.hpp @@ -11,7 +11,10 @@ template void ProtogalaxyProver_::run_oink_prover_on_one_incomplete_key(std::shared_ptr keys, const std::string& domain_separator) { - // ZoneScopedN("ProtogalaxyProver::run_oink_prover_on_one_incomplete_key"); + +#ifdef TRACY_MEMORY + ZoneScopedN("ProtogalaxyProver::run_oink_prover_on_one_incomplete_key"); +#endif OinkProver oink_prover(keys, transcript, domain_separator + '_'); oink_prover.prove(); } @@ -159,7 +162,10 @@ FoldingResult ProtogalaxyProver_ FoldingResult ProtogalaxyProver_::prove() { - // ZoneScopedN("ProtogalaxyProver::prove"); + +#ifdef TRACY_MEMORY + ZoneScopedN("ProtogalaxyProver::prove"); +#endif BB_OP_COUNT_TIME_NAME("ProtogalaxyProver::prove"); // Ensure keys are all of the same size for (size_t idx = 0; idx < DeciderProvingKeys::NUM - 1; ++idx) { diff --git a/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp b/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp index b28dfe0c056..3da7f409894 100644 --- a/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp +++ b/barretenberg/cpp/src/barretenberg/srs/factories/file_crs_factory.hpp @@ -44,7 +44,10 @@ template class FileProverCrs : public ProverCrs { FileProverCrs(const size_t num_points, std::string const& path) : num_points(num_points) { - // ZoneScopedN("FileProverCrs constructor"); + +#ifdef TRACY_MEMORY + ZoneScopedN("FileProverCrs constructor"); +#endif monomials_ = scalar_multiplication::point_table_alloc(num_points); srs::IO::read_transcript_g1(monomials_.get(), num_points, path); diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp index 0dea61f90a3..b876e292031 100644 --- a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp +++ b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp @@ -296,7 +296,10 @@ class UltraFlavor { ProverPolynomials() = default; ProverPolynomials(size_t circuit_size) { - // ZoneScopedN("creating empty prover polys"); + +#ifdef TRACY_MEMORY + ZoneScopedN("creating empty prover polys"); +#endif for (auto& poly : get_to_be_shifted()) { poly = Polynomial{ /*memory size*/ circuit_size - 1, /*largest possible index*/ circuit_size, @@ -562,7 +565,10 @@ class UltraFlavor { PartiallyEvaluatedMultivariates() = default; PartiallyEvaluatedMultivariates(const size_t circuit_size) { - // ZoneScopedN("PartiallyEvaluatedMultivariates constructor"); + +#ifdef TRACY_MEMORY + ZoneScopedN("PartiallyEvaluatedMultivariates constructor"); +#endif // Storage is only needed after the first partial evaluation, hence polynomials of // size (n / 2) for (auto& poly : this->get_all()) { diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp index ba5a3f2a06e..76b8457a858 100644 --- a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp +++ b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck.hpp @@ -209,7 +209,10 @@ template class SumcheckProver { auto round_univariate = round.compute_univariate( round_idx, full_polynomials, relation_parameters, gate_separators, alpha, zk_sumcheck_data); { - // ZoneScopedN("rest of sumcheck round 1"); + +#ifdef TRACY_MEMORY + ZoneScopedN("rest of sumcheck round 1"); +#endif // Place the evaluations of the round univariate into transcript. transcript->send_to_verifier("Sumcheck:univariate_0", round_univariate); @@ -227,7 +230,10 @@ template class SumcheckProver { // We operate on partially_evaluated_polynomials in place. } for (size_t round_idx = 1; round_idx < multivariate_d; round_idx++) { - // ZoneScopedN("sumcheck loop"); + +#ifdef TRACY_MEMORY + ZoneScopedN("sumcheck loop"); +#endif // Write the round univariate to the transcript round_univariate = round.compute_univariate(round_idx, partially_evaluated_polynomials, diff --git a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp index 37325072d5f..30ca1b0d536 100644 --- a/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp +++ b/barretenberg/cpp/src/barretenberg/sumcheck/sumcheck_round.hpp @@ -66,7 +66,10 @@ template class SumcheckProverRound { SumcheckProverRound(size_t initial_round_size) : round_size(initial_round_size) { - // ZoneScopedN("SumcheckProverRound constructor"); + +#ifdef TRACY_MEMORY + ZoneScopedN("SumcheckProverRound constructor"); +#endif // Initialize univariate accumulators to 0 Utils::zero_univariates(univariate_accumulators); } @@ -161,7 +164,10 @@ template class SumcheckProverRound { const RelationSeparator alpha, std::optional> zk_sumcheck_data = std::nullopt) // only submitted when Flavor HasZK { - // ZoneScopedN("compute_univariate"); + +#ifdef TRACY_MEMORY + ZoneScopedN("compute_univariate"); +#endif BB_OP_COUNT_TIME(); // Determine number of threads for multithreading. diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp index 13baaf194a0..bc72f0ea3c7 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_prover.cpp @@ -31,7 +31,10 @@ template void DeciderProver_::execute_relation_ch size_t polynomial_size = proving_key->proving_key.circuit_size; auto sumcheck = Sumcheck(polynomial_size, transcript); { - // ZoneScopedN("sumcheck.prove"); + +#ifdef TRACY_MEMORY + ZoneScopedN("sumcheck.prove"); +#endif sumcheck_output = sumcheck.prove(proving_key->proving_key.polynomials, proving_key->relation_parameters, proving_key->alphas, diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp index f5a9accd165..5180db2d0cf 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp @@ -68,14 +68,20 @@ template class DeciderProvingKey_ { circuit.op_queue->append_nonzero_ops(); } { - // ZoneScopedN("constructing proving key"); + +#ifdef TRACY_MEMORY + ZoneScopedN("constructing proving key"); +#endif proving_key = ProvingKey(dyadic_circuit_size, circuit.public_inputs.size(), commitment_key); } // Construct and add to proving key the wire, selector and copy constraint polynomials Trace::populate(circuit, proving_key, is_structured); - // ZoneScopedN("constructing prover instance after trace populate"); + +#ifdef TRACY_MEMORY + ZoneScopedN("constructing prover instance after trace populate"); +#endif // If Goblin, construct the databus polynomials if constexpr (IsGoblinFlavor) { diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp index 38987abe796..5eb895dcda3 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp @@ -14,29 +14,44 @@ namespace bb { template void OinkProver::prove() { { - // ZoneScopedN("execute_preamble_round"); + +#ifdef TRACY_MEMORY + ZoneScopedN("execute_preamble_round"); +#endif // Add circuit size public input size and public inputs to transcript-> execute_preamble_round(); } { - // ZoneScopedN("execute_wire_commitments_round"); + +#ifdef TRACY_MEMORY + ZoneScopedN("execute_wire_commitments_round"); +#endif // Compute first three wire commitments execute_wire_commitments_round(); } { - // ZoneScopedN("execute_sorted_list_accumulator_round"); + +#ifdef TRACY_MEMORY + ZoneScopedN("execute_sorted_list_accumulator_round"); +#endif // Compute sorted list accumulator and commitment execute_sorted_list_accumulator_round(); } { - // ZoneScopedN("execute_log_derivative_inverse_round"); + +#ifdef TRACY_MEMORY + ZoneScopedN("execute_log_derivative_inverse_round"); +#endif // Fiat-Shamir: beta & gamma execute_log_derivative_inverse_round(); } { - // ZoneScopedN("execute_grand_product_computation_round"); + +#ifdef TRACY_MEMORY + ZoneScopedN("execute_grand_product_computation_round"); +#endif // Compute grand product(s) and commitments. execute_grand_product_computation_round(); } From 21c7599143e23c912c42c03ac5e6e6f419739853 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Fri, 27 Sep 2024 18:44:50 +0000 Subject: [PATCH 19/21] remove tracy-default preset --- barretenberg/cpp/CMakePresets.json | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 882dbb9b9f3..8b8f31f9320 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -137,21 +137,6 @@ "ENABLE_TRACY": "ON" } }, - { - "name": "tracy-default", - "displayName": "Release build with tracy using default clang", - "description": "Release build with tracy using default clang", - "inherits": "default", - "binaryDir": "build-tracy", - "cacheVariables": { - "ENABLE_TRACY": "ON", - "HAVE_STD_REGEX": "ON" - }, - "environment": { - "CXXFLAGS": "-DBB_USE_OP_COUNT -DBB_USE_OP_COUNT_TIME_ONLY" - } - }, - { "name": "tracy-gates", "displayName": "Release build with tracy - but hacked for gate tracking", From ad0326d4633325fe2addb941fddb69cec1d3f092 Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Fri, 27 Sep 2024 21:14:11 +0000 Subject: [PATCH 20/21] add ZoneScoped to BB_OP_COUNT_TIME --- barretenberg/cpp/CMakeLists.txt | 4 ++++ barretenberg/cpp/CMakePresets.json | 3 ++- .../cpp/src/barretenberg/common/op_count.hpp | 12 ++++++++++-- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/barretenberg/cpp/CMakeLists.txt b/barretenberg/cpp/CMakeLists.txt index fadb1aaec23..db289b54800 100644 --- a/barretenberg/cpp/CMakeLists.txt +++ b/barretenberg/cpp/CMakeLists.txt @@ -65,6 +65,10 @@ if(TRACY_PROFILE_MEMORY) add_compile_options(-DTRACY_MEMORY) endif() +if(TRACY_PROFILE_TIME) + add_compile_options(-DTRACY_TIME) +endif() + if(ENABLE_ASAN) add_compile_options(-fsanitize=address) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 8b8f31f9320..c525451b8c0 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -134,7 +134,8 @@ "LDFLAGS": "-g -fno-omit-frame-pointer -rdynamic" }, "cacheVariables": { - "ENABLE_TRACY": "ON" + "ENABLE_TRACY": "ON", + "TRACY_PROFILE_TIME": "ON" } }, { diff --git a/barretenberg/cpp/src/barretenberg/common/op_count.hpp b/barretenberg/cpp/src/barretenberg/common/op_count.hpp index 8e4711d001a..af24ecb1e10 100644 --- a/barretenberg/cpp/src/barretenberg/common/op_count.hpp +++ b/barretenberg/cpp/src/barretenberg/common/op_count.hpp @@ -2,6 +2,7 @@ #pragma once #include +#include #ifndef BB_USE_OP_COUNT // require a semicolon to appease formatters // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) @@ -11,12 +12,19 @@ // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define BB_OP_COUNT_CYCLES_NAME(name) (void)0 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) -#define BB_OP_COUNT_TIME_NAME(name) (void)0 -// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define BB_OP_COUNT_CYCLES() (void)0 +#ifndef TRACY_TIME +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define BB_OP_COUNT_TIME_NAME(name) (void)0 // NOLINTNEXTLINE(cppcoreguidelines-macro-usage) #define BB_OP_COUNT_TIME() (void)0 #else +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define BB_OP_COUNT_TIME_NAME(name) ZoneScopedN(name) +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define BB_OP_COUNT_TIME() BB_OP_COUNT_TIME_NAME(__func__) +#endif +#else /** * Provides an abstraction that counts operations based on function names. * For efficiency, we spread out counts across threads. From 5bf2a581237bf9bb21e2b27d4cc2c4e7f2eb10aa Mon Sep 17 00:00:00 2001 From: lucasxia01 Date: Sat, 28 Sep 2024 00:02:52 +0000 Subject: [PATCH 21/21] add #ifdef TRACY_MEMORY to everywhere --- .../ultra_honk/decider_proving_key.hpp | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp index bfcf1b26aae..2bfd069a5a4 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp @@ -84,13 +84,17 @@ template class DeciderProvingKey_ { } else { // Allocate only a correct amount of memory for each polynomial // Allocate the wires and selectors polynomials { +#ifdef TRACY_MEMORY ZoneScopedN("allocating wires"); +#endif for (auto& wire : proving_key.polynomials.get_wires()) { wire = Polynomial::shiftable(proving_key.circuit_size); } } { +#ifdef TRACY_MEMORY ZoneScopedN("allocating gate selectors"); +#endif // Define gate selectors over the block they are isolated to for (auto [selector, block] : zip_view(proving_key.polynomials.get_gate_selectors(), circuit.blocks.get_gate_blocks())) { @@ -110,14 +114,18 @@ template class DeciderProvingKey_ { } } { +#ifdef TRACY_MEMORY ZoneScopedN("allocating non-gate selectors"); +#endif // Set the other non-gate selector polynomials to full size for (auto& selector : proving_key.polynomials.get_non_gate_selectors()) { selector = Polynomial(proving_key.circuit_size); } } if constexpr (IsGoblinFlavor) { +#ifdef TRACY_MEMORY ZoneScopedN("allocating ecc op wires and selector"); +#endif // Allocate the ecc op wires and selector const size_t ecc_op_block_size = circuit.blocks.ecc_op.get_fixed_size(is_structured); const size_t op_wire_offset = Flavor::has_zero_row ? 1 : 0; @@ -154,7 +162,9 @@ template class DeciderProvingKey_ { std::min(static_cast(MAX_LOOKUP_TABLES_SIZE), dyadic_circuit_size - 1); size_t table_offset = dyadic_circuit_size - max_tables_size; { +#ifdef TRACY_MEMORY ZoneScopedN("allocating table polynomials"); +#endif ASSERT(dyadic_circuit_size > max_tables_size); // Allocate the table polynomials @@ -165,7 +175,9 @@ template class DeciderProvingKey_ { } } { +#ifdef TRACY_MEMORY ZoneScopedN("allocating sigmas and ids"); +#endif for (auto& sigma : proving_key.polynomials.get_sigmas()) { sigma = typename Flavor::Polynomial(proving_key.circuit_size); } @@ -210,13 +222,17 @@ template class DeciderProvingKey_ { } } { +#ifdef TRACY_MEMORY ZoneScopedN("constructing z_perm"); +#endif // Allocate the z_perm polynomial proving_key.polynomials.z_perm = Polynomial::shiftable(proving_key.circuit_size); } { +#ifdef TRACY_MEMORY ZoneScopedN("allocating lagrange polynomials"); +#endif // First and last lagrange polynomials (in the full circuit size) proving_key.polynomials.lagrange_first = Polynomial(1, dyadic_circuit_size, 0); proving_key.polynomials.lagrange_last = Polynomial(1, dyadic_circuit_size, dyadic_circuit_size - 1); @@ -236,7 +252,9 @@ template class DeciderProvingKey_ { // If Goblin, construct the databus polynomials if constexpr (IsGoblinFlavor) { +#ifdef TRACY_MEMORY ZoneScopedN("constructing databus polynomials"); +#endif construct_databus_polynomials(circuit); } @@ -245,13 +263,17 @@ template class DeciderProvingKey_ { proving_key.polynomials.lagrange_last.at(dyadic_circuit_size - 1) = 1; { +#ifdef TRACY_MEMORY ZoneScopedN("constructing lookup table polynomials"); +#endif construct_lookup_table_polynomials( proving_key.polynomials.get_tables(), circuit, dyadic_circuit_size); } { +#ifdef TRACY_MEMORY ZoneScopedN("constructing lookup read counts"); +#endif construct_lookup_read_counts(proving_key.polynomials.lookup_read_counts, proving_key.polynomials.lookup_read_tags, circuit,