From 1ca392e1c93fc486caf3c43f4311f014163400df Mon Sep 17 00:00:00 2001 From: ludamad Date: Mon, 16 Oct 2023 13:27:20 +0000 Subject: [PATCH 01/14] feat: better profiling --- barretenberg/cpp/CMakePresets.json | 21 +++++++++++++++++++ .../scripts/collect_profile_information.sh | 7 ++++--- .../benchmark/honk_bench/main.simple.cpp | 13 +++++++++--- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 6bafa8fed7a..71547882825 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -183,6 +183,22 @@ "MULTITHREADING": "ON" } }, + { + "name": "xray-1thread-no-inline", + "displayName": "Build with single-threaded XRay Profiling", + "description": "Build with Clang and enable single-threaded LLVM XRay for profiling", + "generator": "Unix Makefiles", + "inherits": "clang16", + "environment": { + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=10 -fno-inline-functions", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=10 -fno-inline-functions", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=10 -fno-inline-functions" + }, + "cacheVariables": { + "MULTITHREADING": "OFF" + }, + "binaryDir": "build-xray-1thread-no-inline" + }, { "name": "xray-1thread", "displayName": "Build with single-threaded XRay Profiling", @@ -303,6 +319,11 @@ "jobs": 0, "targets": ["barretenberg.wasm"] }, + { + "name": "xray-1thread-no-inline", + "configurePreset": "xray-1thread-no-inline", + "inherits": "default" + }, { "name": "xray-1thread", "configurePreset": "xray-1thread", diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index 62757181ac3..798d27b1710 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -39,7 +39,8 @@ function shorten_cpp_names() { # Process benchmark file. llvm-xray-16 stack xray-log.honk_bench_main_simple.* \ --instr_map=./bin/honk_bench_main_simple --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \ - | node ../scripts/llvm_xray_stack_flame_corrector.js \ - | shorten_cpp_names \ - | ../scripts/flamegraph.pl > xray.svg + | node ../scripts/llvm_xray_stack_flame_corrector.js + # | shorten_cpp_names \ + # | ../scripts/flamegraph.pl --width 2000 --fontsize 10 \ + # > xray.svg echo "Profiling complete, now you can do e.g. 'scp mainframe:`readlink -f xray.svg` .' on a local terminal and open the SVG in a browser." diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index f33faf554d1..f41412bc62c 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -37,6 +37,13 @@ template void generate_sha256_test_circuit(Builder& builder, } } +[[clang::xray_always_instrument]] [[clang::noinline]] void profile_proving(auto& ext_prover) +{ + for (size_t i = 0; i < 10; i++) { + auto proof = ext_prover.construct_proof(); + } +} + /** * @brief Benchmark: Construction of a Ultra Honk proof for a circuit determined by the provided circuit function */ @@ -50,9 +57,9 @@ void construct_proof_ultra() noexcept auto composer = UltraHonk(); auto instance = composer.create_instance(builder); auto ext_prover = composer.create_prover(instance); - for (size_t i = 0; i < 10; i++) { - auto proof = ext_prover.construct_proof(); - } + // exercise thread pool + auto proof = ext_prover.construct_proof(); + profile_proving(ext_prover); } int main() From 2d90df8571e5033ec7e1e48735c496f0ef109266 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Oct 2023 23:32:56 +0000 Subject: [PATCH 02/14] Benchmark fixes --- .../scripts/collect_profile_information.sh | 8 ++-- .../benchmark/honk_bench/main.simple.cpp | 37 +++++++++++-------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index 798d27b1710..1262bb63b82 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -39,8 +39,8 @@ function shorten_cpp_names() { # Process benchmark file. llvm-xray-16 stack xray-log.honk_bench_main_simple.* \ --instr_map=./bin/honk_bench_main_simple --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \ - | node ../scripts/llvm_xray_stack_flame_corrector.js - # | shorten_cpp_names \ - # | ../scripts/flamegraph.pl --width 2000 --fontsize 10 \ - # > xray.svg + | node ../scripts/llvm_xray_stack_flame_corrector.js \ + | shorten_cpp_names \ + | ../scripts/flamegraph.pl --width 2000 --fontsize 10 \ + > xray.svg echo "Profiling complete, now you can do e.g. 'scp mainframe:`readlink -f xray.svg` .' on a local terminal and open the SVG in a browser." diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index f41412bc62c..7781ebff5b5 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -22,25 +22,33 @@ #include "barretenberg/stdlib/primitives/packed_byte_array/packed_byte_array.hpp" #include "barretenberg/stdlib/primitives/witness/witness.hpp" -using namespace proof_system::plonk; +// TODO(AD): put this into a header +#if defined(__clang__) +#define BBERG_INSTRUMENT [[clang::xray_always_instrument]] +#define BBERG_NOINLINE [[clang::noinline]] +#else +#define BBERG_INSTRUMENT +#define BBERG_NOINLINE +#endif -using UltraBuilder = proof_system::UltraCircuitBuilder; -using UltraHonk = proof_system::honk::UltraComposer; +using namespace proof_system; template void generate_sha256_test_circuit(Builder& builder, size_t num_iterations) { std::string in; in.resize(32); - proof_system::plonk::stdlib::packed_byte_array input(&builder, in); + plonk::stdlib::packed_byte_array input(&builder, in); for (size_t i = 0; i < num_iterations; i++) { - input = proof_system::plonk::stdlib::sha256(input); + input = plonk::stdlib::sha256(input); } } -[[clang::xray_always_instrument]] [[clang::noinline]] void profile_proving(auto& ext_prover) +BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(auto& ext_prover) { - for (size_t i = 0; i < 10; i++) { - auto proof = ext_prover.construct_proof(); + ext_prover.construct_proof(); + for (size_t i = 0; i < 1000; i++) { + // Bench sumcheck + ext_prover.execute_relation_check_rounds(); } } @@ -51,15 +59,12 @@ void construct_proof_ultra() noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); // Constuct circuit and prover; don't include this part in measurement - auto builder = typename UltraHonk::CircuitBuilder(); - generate_sha256_test_circuit(builder, 1); + honk::UltraComposer::CircuitBuilder builder; + generate_sha256_test_circuit(builder, 1); - auto composer = UltraHonk(); - auto instance = composer.create_instance(builder); - auto ext_prover = composer.create_prover(instance); - // exercise thread pool - auto proof = ext_prover.construct_proof(); - profile_proving(ext_prover); + honk::UltraComposer composer; + std::shared_ptr instance = composer.create_instance(builder); + honk::UltraProver ext_prover = composer.create_prover(instance); } int main() From 70fee3dabf93da4117a51fd77945413f5428ae02 Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Oct 2023 23:45:49 +0000 Subject: [PATCH 03/14] Benchmark fixes --- barretenberg/cpp/CMakePresets.json | 14 +++++++------- .../benchmark/honk_bench/main.simple.cpp | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 71547882825..d1c0f3e3eeb 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -184,15 +184,15 @@ } }, { - "name": "xray-1thread-no-inline", + "name": "xray-1thread-verbose", "displayName": "Build with single-threaded XRay Profiling", "description": "Build with Clang and enable single-threaded LLVM XRay for profiling", "generator": "Unix Makefiles", "inherits": "clang16", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=10 -fno-inline-functions", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=10 -fno-inline-functions", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=10 -fno-inline-functions" + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -inline-threshold=500", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -inline-threshold=500", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -inline-threshold=500" }, "cacheVariables": { "MULTITHREADING": "OFF" @@ -206,9 +206,9 @@ "generator": "Unix Makefiles", "inherits": "clang16", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=10", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=10", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=10" + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100" }, "cacheVariables": { "MULTITHREADING": "OFF" diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index 7781ebff5b5..dcb2caec089 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -46,7 +46,7 @@ template void generate_sha256_test_circuit(Builder& builder, BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(auto& ext_prover) { ext_prover.construct_proof(); - for (size_t i = 0; i < 1000; i++) { + for (size_t i = 0; i < 100; i++) { // Bench sumcheck ext_prover.execute_relation_check_rounds(); } @@ -60,7 +60,7 @@ void construct_proof_ultra() noexcept barretenberg::srs::init_crs_factory("../srs_db/ignition"); // Constuct circuit and prover; don't include this part in measurement honk::UltraComposer::CircuitBuilder builder; - generate_sha256_test_circuit(builder, 1); + generate_sha256_test_circuit(builder, 100); honk::UltraComposer composer; std::shared_ptr instance = composer.create_instance(builder); From 4b4f28557bb7e7f81c99f16846c336c5349e401f Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Oct 2023 23:46:31 +0000 Subject: [PATCH 04/14] verbose --- barretenberg/cpp/CMakePresets.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index d1c0f3e3eeb..bea16ed7fb2 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -197,7 +197,7 @@ "cacheVariables": { "MULTITHREADING": "OFF" }, - "binaryDir": "build-xray-1thread-no-inline" + "binaryDir": "build-xray-1thread-verbose" }, { "name": "xray-1thread", @@ -320,8 +320,8 @@ "targets": ["barretenberg.wasm"] }, { - "name": "xray-1thread-no-inline", - "configurePreset": "xray-1thread-no-inline", + "name": "xray-1thread-verbose", + "configurePreset": "xray-1thread-verbose", "inherits": "default" }, { From c08b335db064d7e30117cafd0bf76267aac2950b Mon Sep 17 00:00:00 2001 From: ludamad Date: Thu, 19 Oct 2023 00:00:41 +0000 Subject: [PATCH 05/14] Timing --- barretenberg/cpp/CMakePresets.json | 6 +++--- .../src/barretenberg/benchmark/honk_bench/main.simple.cpp | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index bea16ed7fb2..1bab33d1186 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -190,9 +190,9 @@ "generator": "Unix Makefiles", "inherits": "clang16", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -inline-threshold=500", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -inline-threshold=500", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -inline-threshold=500" + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -finline-max-stacksize=150", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -finline-max-stacksize=150", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -finline-max-stacksize=150" }, "cacheVariables": { "MULTITHREADING": "OFF" diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index dcb2caec089..175e1607f16 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -43,10 +43,10 @@ template void generate_sha256_test_circuit(Builder& builder, } } -BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(auto& ext_prover) +BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(honk::UltraProver& ext_prover) { ext_prover.construct_proof(); - for (size_t i = 0; i < 100; i++) { + for (size_t i = 0; i < 1000; i++) { // Bench sumcheck ext_prover.execute_relation_check_rounds(); } @@ -65,6 +65,7 @@ void construct_proof_ultra() noexcept honk::UltraComposer composer; std::shared_ptr instance = composer.create_instance(builder); honk::UltraProver ext_prover = composer.create_prover(instance); + sumcheck_profiling(ext_prover); } int main() From b062c2e93704e4ce7a2d631c938e3e4149fa242a Mon Sep 17 00:00:00 2001 From: ludamad Date: Thu, 19 Oct 2023 00:02:37 +0000 Subject: [PATCH 06/14] Timing --- .../cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index 175e1607f16..a20b68c9e5f 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -46,7 +46,7 @@ template void generate_sha256_test_circuit(Builder& builder, BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(honk::UltraProver& ext_prover) { ext_prover.construct_proof(); - for (size_t i = 0; i < 1000; i++) { + for (size_t i = 0; i < 10000; i++) { // Bench sumcheck ext_prover.execute_relation_check_rounds(); } From 6c0d8ae5c1ba466f505581608f4842a58bd75c22 Mon Sep 17 00:00:00 2001 From: ludamad Date: Thu, 19 Oct 2023 01:24:54 +0000 Subject: [PATCH 07/14] fix: parallelization of sumcheck partially_evaluate --- barretenberg/cpp/CMakePresets.json | 17 +++++++---------- .../cpp/scripts/collect_profile_information.sh | 2 +- .../benchmark/honk_bench/main.simple.cpp | 5 +++-- .../common/parallel_for_mutex_pool.cpp | 2 +- .../src/barretenberg/honk/sumcheck/sumcheck.hpp | 6 ++++-- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 1bab33d1186..b0bfd929265 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -184,20 +184,17 @@ } }, { - "name": "xray-1thread-verbose", + "name": "xray-verbose", "displayName": "Build with single-threaded XRay Profiling", "description": "Build with Clang and enable single-threaded LLVM XRay for profiling", "generator": "Unix Makefiles", "inherits": "clang16", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -finline-max-stacksize=150", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -finline-max-stacksize=150", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -mllvm -finline-max-stacksize=150" + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150" }, - "cacheVariables": { - "MULTITHREADING": "OFF" - }, - "binaryDir": "build-xray-1thread-verbose" + "binaryDir": "build-xray-verbose" }, { "name": "xray-1thread", @@ -320,8 +317,8 @@ "targets": ["barretenberg.wasm"] }, { - "name": "xray-1thread-verbose", - "configurePreset": "xray-1thread-verbose", + "name": "xray-verbose", + "configurePreset": "xray-verbose", "inherits": "default" }, { diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index 1262bb63b82..0b7d79ef8ed 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -41,6 +41,6 @@ llvm-xray-16 stack xray-log.honk_bench_main_simple.* \ --instr_map=./bin/honk_bench_main_simple --stack-format=flame --aggregate-threads --aggregation-type=time --all-stacks \ | node ../scripts/llvm_xray_stack_flame_corrector.js \ | shorten_cpp_names \ - | ../scripts/flamegraph.pl --width 2000 --fontsize 10 \ + | ../scripts/flamegraph.pl --width 1200 --fontsize 10 \ > xray.svg echo "Profiling complete, now you can do e.g. 'scp mainframe:`readlink -f xray.svg` .' on a local terminal and open the SVG in a browser." diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index a20b68c9e5f..b33d5a57097 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -46,7 +46,7 @@ template void generate_sha256_test_circuit(Builder& builder, BBERG_INSTRUMENT BBERG_NOINLINE void sumcheck_profiling(honk::UltraProver& ext_prover) { ext_prover.construct_proof(); - for (size_t i = 0; i < 10000; i++) { + for (size_t i = 0; i < 200; i++) { // Bench sumcheck ext_prover.execute_relation_check_rounds(); } @@ -60,7 +60,8 @@ void construct_proof_ultra() noexcept barretenberg::srs::init_crs_factory("../srs_db/ignition"); // Constuct circuit and prover; don't include this part in measurement honk::UltraComposer::CircuitBuilder builder; - generate_sha256_test_circuit(builder, 100); + generate_sha256_test_circuit(builder, 1); + std::cout << "gates: " << builder.get_total_circuit_size() << std::endl; honk::UltraComposer composer; std::shared_ptr instance = composer.create_instance(builder); diff --git a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp index c8fb4ebf5ec..beb9ab35e5a 100644 --- a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp +++ b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp @@ -50,7 +50,7 @@ class ThreadPool { std::condition_variable complete_condition_; bool stop = false; - void worker_loop(size_t thread_index); + [[clang::xray_never_instrument]] void worker_loop(size_t thread_index); void do_iterations() { diff --git a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp index b8348c819d2..e2f2cb335ac 100644 --- a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp @@ -139,12 +139,14 @@ template class SumcheckProver { void partially_evaluate(auto& polynomials, size_t round_size, FF round_challenge) { // after the first round, operate in place on partially_evaluated_polynomials - for (size_t j = 0; j < polynomials.size(); ++j) { + parallel_for(polynomials.size(), [&](size_t j) { for (size_t i = 0; i < round_size; i += 2) { + auto x = polynomials[j][i]; + std::cout << polynomials[j][i] << std::endl; partially_evaluated_polynomials[j][i >> 1] = polynomials[j][i] + round_challenge * (polynomials[j][i + 1] - polynomials[j][i]); } - } + }); }; }; From 60fd533b7593dd3aa881dc9b36436fb0b74324d7 Mon Sep 17 00:00:00 2001 From: ludamad Date: Thu, 19 Oct 2023 01:29:29 +0000 Subject: [PATCH 08/14] fix: compiler hints --- .../barretenberg/benchmark/honk_bench/main.simple.cpp | 9 --------- barretenberg/cpp/src/barretenberg/common/inline.hpp | 7 ------- .../src/barretenberg/common/parallel_for_mutex_pool.cpp | 4 +++- .../src/barretenberg/ecc/fields/field_declarations.hpp | 2 +- barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp | 2 +- 5 files changed, 5 insertions(+), 19 deletions(-) delete mode 100644 barretenberg/cpp/src/barretenberg/common/inline.hpp diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp index b33d5a57097..84a2f3c8c88 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/main.simple.cpp @@ -22,15 +22,6 @@ #include "barretenberg/stdlib/primitives/packed_byte_array/packed_byte_array.hpp" #include "barretenberg/stdlib/primitives/witness/witness.hpp" -// TODO(AD): put this into a header -#if defined(__clang__) -#define BBERG_INSTRUMENT [[clang::xray_always_instrument]] -#define BBERG_NOINLINE [[clang::noinline]] -#else -#define BBERG_INSTRUMENT -#define BBERG_NOINLINE -#endif - using namespace proof_system; template void generate_sha256_test_circuit(Builder& builder, size_t num_iterations) diff --git a/barretenberg/cpp/src/barretenberg/common/inline.hpp b/barretenberg/cpp/src/barretenberg/common/inline.hpp deleted file mode 100644 index ee5be8ac78e..00000000000 --- a/barretenberg/cpp/src/barretenberg/common/inline.hpp +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#ifdef _WIN32 -#define BBERG_INLINE __forceinline inline -#else -#define BBERG_INLINE __attribute__((always_inline)) inline -#endif diff --git a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp index beb9ab35e5a..47e03b5ea85 100644 --- a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp +++ b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp @@ -8,6 +8,8 @@ #include #include +#include "barretenberg/common/compiler_hints.hpp" + namespace { class ThreadPool { @@ -50,7 +52,7 @@ class ThreadPool { std::condition_variable complete_condition_; bool stop = false; - [[clang::xray_never_instrument]] void worker_loop(size_t thread_index); + BBERG_NO_INSTRUMENT void worker_loop(size_t thread_index); void do_iterations() { diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp index 799c202f709..c64b1e35196 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/fields/field_declarations.hpp @@ -1,6 +1,6 @@ #pragma once #include "barretenberg/common/assert.hpp" -#include "barretenberg/common/inline.hpp" +#include "barretenberg/common/compiler_hints.hpp" #include "barretenberg/numeric/random/engine.hpp" #include "barretenberg/numeric/uint128/uint128.hpp" #include "barretenberg/numeric/uint256/uint256.hpp" diff --git a/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp b/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp index 3eba4298afb..0b2d13761f4 100644 --- a/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp +++ b/barretenberg/cpp/src/barretenberg/ecc/groups/element.hpp @@ -1,7 +1,7 @@ #pragma once #include "affine_element.hpp" -#include "barretenberg/common/inline.hpp" +#include "barretenberg/common/compiler_hints.hpp" #include "barretenberg/common/mem.hpp" #include "barretenberg/numeric/random/engine.hpp" #include "barretenberg/numeric/uint256/uint256.hpp" From cd26dbb6114f216838d4321ec3136ddcf2c1851f Mon Sep 17 00:00:00 2001 From: ludamad Date: Wed, 18 Oct 2023 21:37:45 -0400 Subject: [PATCH 09/14] Update sumcheck.hpp --- barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp index e2f2cb335ac..81af019be73 100644 --- a/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp +++ b/barretenberg/cpp/src/barretenberg/honk/sumcheck/sumcheck.hpp @@ -141,8 +141,6 @@ template class SumcheckProver { // after the first round, operate in place on partially_evaluated_polynomials parallel_for(polynomials.size(), [&](size_t j) { for (size_t i = 0; i < round_size; i += 2) { - auto x = polynomials[j][i]; - std::cout << polynomials[j][i] << std::endl; partially_evaluated_polynomials[j][i >> 1] = polynomials[j][i] + round_challenge * (polynomials[j][i + 1] - polynomials[j][i]); } From 8d5d7dd88a2c6e8a51f386197e0fd93266770fee Mon Sep 17 00:00:00 2001 From: ludamad Date: Thu, 19 Oct 2023 01:38:06 +0000 Subject: [PATCH 10/14] fix: file --- .../src/barretenberg/common/compiler_hints.hpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp diff --git a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp new file mode 100644 index 00000000000..c3bdf3cf6b9 --- /dev/null +++ b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp @@ -0,0 +1,18 @@ +#pragma once + +#ifdef _WIN32 +#define BBERG_INLINE __forceinline inline +#else +#define BBERG_INLINE __attribute__((always_inline)) inline +#endif + +// TODO(AD): Other compilers +#if defined(__clang__) +#define BBERG_INSTRUMENT [[clang::xray_always_instrument]] +#define BBERG_NO_INSTRUMENT [[clang::xray_never_instrument]] +#define BBERG_NOINLINE [[clang::noinline]] +#else +#define BBERG_INSTRUMENT +#define BBERG_NO_INSTRUMENT +#define BBERG_NOINLINE +#endif \ No newline at end of file From 70c71489a4114df92a5c6c2607ae7f1ad44e2e3a Mon Sep 17 00:00:00 2001 From: ludamad Date: Thu, 19 Oct 2023 01:40:48 +0000 Subject: [PATCH 11/14] fix: consistency --- barretenberg/cpp/CMakePresets.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index b0bfd929265..dbb21136d88 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -219,9 +219,9 @@ "generator": "Unix Makefiles", "inherits": "clang16", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=10", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=10", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=10" + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100" }, "binaryDir": "build-xray" } From 8fbaa333beb9237aff6081a5219314e1d1ea0530 Mon Sep 17 00:00:00 2001 From: ludamad Date: Thu, 19 Oct 2023 01:42:29 +0000 Subject: [PATCH 12/14] fix: consistency --- barretenberg/cpp/CMakePresets.json | 39 ++++++++++++------------------ 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index dbb21136d88..c54e0b3419e 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -184,11 +184,23 @@ } }, { - "name": "xray-verbose", - "displayName": "Build with single-threaded XRay Profiling", - "description": "Build with Clang and enable single-threaded LLVM XRay for profiling", + "name": "xray", + "displayName": "Build with multi-threaded XRay Profiling", + "description": "Build with Clang and enable multi-threaded LLVM XRay for profiling", "generator": "Unix Makefiles", "inherits": "clang16", + "environment": { + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100" + }, + "binaryDir": "build-xray" + }, + { + "name": "xray-verbose", + "displayName": "Build with detailed XRay Profiling", + "description": "Build with Clang and enable detailed LLVM XRay for profiling", + "inherits": "xray", "environment": { "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", @@ -200,30 +212,11 @@ "name": "xray-1thread", "displayName": "Build with single-threaded XRay Profiling", "description": "Build with Clang and enable single-threaded LLVM XRay for profiling", - "generator": "Unix Makefiles", - "inherits": "clang16", - "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100" - }, + "inherits": "xray", "cacheVariables": { "MULTITHREADING": "OFF" }, "binaryDir": "build-xray-1thread" - }, - { - "name": "xray", - "displayName": "Build with multi-threaded XRay Profiling", - "description": "Build with Clang and enable multi-threaded LLVM XRay for profiling", - "generator": "Unix Makefiles", - "inherits": "clang16", - "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100" - }, - "binaryDir": "build-xray" } ], "buildPresets": [ From 8d87c98c12d48643b9de59ba1440c3345db54b43 Mon Sep 17 00:00:00 2001 From: codygunton Date: Thu, 19 Oct 2023 04:14:43 +0000 Subject: [PATCH 13/14] Update comparison script --- .../benchmark/honk_bench/compare_honk_to_plonk_ultra.sh | 2 +- .../benchmark/honk_bench/ultra_honk.bench.cpp | 8 ++++---- .../benchmark/honk_bench/ultra_plonk.bench.cpp | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_to_plonk_ultra.sh b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_to_plonk_ultra.sh index 0e5625e9309..1863327ae4e 100755 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_to_plonk_ultra.sh +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/compare_honk_to_plonk_ultra.sh @@ -6,7 +6,7 @@ echo -e '\nComparing Ultra Plonk/Honk benchmarks.' # Set some directories -BASE_DIR="$HOME/barretenberg/cpp" +BASE_DIR="$HOME/aztec-packages/barretenberg/cpp" BUILD_DIR="$BASE_DIR/build-bench" BENCH_RESULTS_DIR="$BASE_DIR/tmp_bench_results" BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools" diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp index 56c45d24ef0..92933bb4648 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk.bench.cpp @@ -30,22 +30,22 @@ void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuil BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit) ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); + ->Unit(::benchmark::kMillisecond); BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit) ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); + ->Unit(::benchmark::kMillisecond); BENCHMARK_CAPTURE(construct_proof_ultra, ecdsa_verification, &bench_utils::generate_ecdsa_verification_test_circuit) ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); + ->Unit(::benchmark::kMillisecond); BENCHMARK_CAPTURE(construct_proof_ultra, merkle_membership, &bench_utils::generate_merkle_membership_test_circuit) ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); + ->Unit(::benchmark::kMillisecond); } // namespace ultra_honk_bench \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp index 196245f4ea3..74a9fd1acc7 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_plonk.bench.cpp @@ -26,22 +26,22 @@ void construct_proof_ultra(State& state, void (*test_circuit_function)(UltraBuil BENCHMARK_CAPTURE(construct_proof_ultra, sha256, &bench_utils::generate_sha256_test_circuit) ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); + ->Unit(::benchmark::kMillisecond); BENCHMARK_CAPTURE(construct_proof_ultra, keccak, &bench_utils::generate_keccak_test_circuit) ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); + ->Unit(::benchmark::kMillisecond); BENCHMARK_CAPTURE(construct_proof_ultra, ecdsa_verification, &bench_utils::generate_ecdsa_verification_test_circuit) ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); + ->Unit(::benchmark::kMillisecond); BENCHMARK_CAPTURE(construct_proof_ultra, merkle_membership, &bench_utils::generate_merkle_membership_test_circuit) ->DenseRange(MIN_NUM_ITERATIONS, MAX_NUM_ITERATIONS) ->Repetitions(NUM_REPETITIONS) - ->Unit(::benchmark::kSecond); + ->Unit(::benchmark::kMillisecond); } // namespace ultra_plonk_bench \ No newline at end of file From a122cca23bf063444a5dee9fc3f52b634ff636e7 Mon Sep 17 00:00:00 2001 From: codygunton Date: Thu, 19 Oct 2023 04:23:52 +0000 Subject: [PATCH 14/14] Fix compare_bench_vs_base in this branch --- .../src/barretenberg/benchmark/compare_branch_vs_baseline.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/compare_branch_vs_baseline.sh b/barretenberg/cpp/src/barretenberg/benchmark/compare_branch_vs_baseline.sh index f0105bb7a34..0ac6dce1157 100755 --- a/barretenberg/cpp/src/barretenberg/benchmark/compare_branch_vs_baseline.sh +++ b/barretenberg/cpp/src/barretenberg/benchmark/compare_branch_vs_baseline.sh @@ -10,7 +10,7 @@ BASELINE_BRANCH="master" echo -e "\nComparing $BENCH_TARGET between $BASELINE_BRANCH and current branch:" # Set some directories -BASE_DIR="$HOME/barretenberg/cpp" +BASE_DIR="$HOME/aztec-packages/barretenberg/cpp" BUILD_DIR="$BASE_DIR/build-bench" # matches build dir specified in bench preset BENCH_RESULTS_DIR="$BASE_DIR/tmp_bench_results" BENCH_TOOLS_DIR="$BUILD_DIR/_deps/benchmark-src/tools"