diff --git a/barretenberg/cpp/CMakePresets.json b/barretenberg/cpp/CMakePresets.json index 9b3dbbe6d80..7927b0ba494 100644 --- a/barretenberg/cpp/CMakePresets.json +++ b/barretenberg/cpp/CMakePresets.json @@ -54,18 +54,6 @@ "DISABLE_ASM": "ON" } }, - { - "name": "tsan", - "displayName": "Debugging build with address sanitizer on Clang-16", - "description": "Build with address sanitizer on clang16 with debugging information", - "inherits": "clang16-dbg", - "binaryDir": "build-tsan", - "environment": { - "CFLAGS": "-fsanitize=thread", - "CXXFLAGS": "-fsanitize=thread", - "LDFLAGS": "-fsanitize=thread" - } - }, { "name": "asan", "displayName": "Debugging build with address sanitizer on Clang-16", @@ -136,6 +124,18 @@ "SMT": "ON" } }, + { + "name": "tsan", + "displayName": "Debugging build with thread sanitizer on Clang-16", + "description": "Build with thread sanitizer on clang16 with debugging information", + "inherits": "clang16-dbg", + "binaryDir": "build-tsan", + "environment": { + "CFLAGS": "-fsanitize=thread", + "CXXFLAGS": "-fsanitize=thread", + "LDFLAGS": "-fsanitize=thread" + } + }, { "name": "coverage", "displayName": "Build with coverage", @@ -203,8 +203,8 @@ "inherits": "clang16", "environment": { "CFLAGS": "-fxray-instrument", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=500", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=500" + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=500 -DXRAY=1", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=500 -DXRAY=1" }, "binaryDir": "build-xray" }, @@ -214,9 +214,9 @@ "description": "Build with Clang and enable detailed LLVM XRay for profiling", "inherits": "xray", "environment": { - "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", - "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150", - "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150" + "CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1", + "CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1", + "LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1" }, "binaryDir": "build-xray-verbose" }, @@ -258,11 +258,6 @@ "inherits": "default", "configurePreset": "asan" }, - { - "name": "tsan", - "inherits": "default", - "configurePreset": "tsan" - }, { "name": "gcc", "inherits": "default", @@ -293,6 +288,11 @@ "inherits": "clang16", "configurePreset": "smt-verification" }, + { + "name": "tsan", + "inherits": "default", + "configurePreset": "tsan" + }, { "name": "coverage", "inherits": "default", diff --git a/barretenberg/cpp/scripts/collect_profile_information.sh b/barretenberg/cpp/scripts/collect_profile_information.sh index cd3b7cc1957..28ca73fe4d2 100755 --- a/barretenberg/cpp/scripts/collect_profile_information.sh +++ b/barretenberg/cpp/scripts/collect_profile_information.sh @@ -3,7 +3,7 @@ set -eu PRESET=${1:-xray} # can also be 'xray-1thread' ONLY_PROCESS=${2:-} -EXECUTABLE=${3:-ultra_honk_passes_bench} +EXECUTABLE=${3:-ultra_honk_rounds_bench} # Move above script dir. cd $(dirname $0)/.. diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt index c4adab0ade9..ed6122bb41e 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/CMakeLists.txt @@ -2,7 +2,7 @@ set(BENCHMARK_SOURCES standard_plonk.bench.cpp ultra_honk.bench.cpp - ultra_honk_passes.bench.cpp + ultra_honk_rounds.bench.cpp ultra_plonk.bench.cpp ) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp similarity index 59% rename from barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp rename to barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp index c150fdb79b3..8b4e7145596 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_passes.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/honk_bench/ultra_honk_rounds.bench.cpp @@ -8,11 +8,19 @@ using namespace benchmark; using namespace proof_system; +// The rounds to measure enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH }; -BBERG_PROFILE static void test_pass_inner(State& state, honk::UltraProver& prover, size_t index) noexcept +/** + * @details Benchmark ultrahonk by performing all the rounds, but only measuring one. + * Note: As a result the very short rounds take a long time for statistical significance, so recommended to set their + * iterations to 1. + * @param state - The google benchmark state. + * @param prover - The ultrahonk prover. + * @param index - The pass to measure. + **/ +BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prover, size_t index) noexcept { - auto time_if_index = [&](size_t target_index, auto&& func) -> void { if (index == target_index) { state.ResumeTiming(); @@ -33,27 +41,28 @@ BBERG_PROFILE static void test_pass_inner(State& state, honk::UltraProver& prove state.ResumeTiming(); } } -BBERG_PROFILE static void test_pass(State& state, size_t index) noexcept +BBERG_PROFILE static void test_round(State& state, size_t index) noexcept { barretenberg::srs::init_crs_factory("../srs_db/ignition"); honk::UltraComposer composer; - honk::UltraProver prover = bench_utils::get_prover( - composer, &bench_utils::generate_ecdsa_verification_test_circuit, 10); - test_pass_inner(state, prover, index); + // TODO(AD) benchmark both sparse and dense circuits? + honk::UltraProver prover = + bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit, 1); + test_round_inner(state, prover, index); } -#define PASS_BENCHMARK(pass) \ - static void PASS_##pass(State& state) noexcept \ +#define ROUND_BENCHMARK(round) \ + static void ROUND_##round(State& state) noexcept \ { \ - test_pass(state, pass); \ + test_round(state, round); \ } \ - BENCHMARK(PASS_##pass)->Unit(::benchmark::kMillisecond) + BENCHMARK(ROUND_##round)->Unit(::benchmark::kMillisecond) -// Fast passes take a long time to benchmark because of how we compute statistical significance. +// Fast rounds take a long time to benchmark because of how we compute statistical significance. // Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part. -PASS_BENCHMARK(PREAMBLE)->Iterations(1); -PASS_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); -PASS_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); -PASS_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); -PASS_BENCHMARK(RELATION_CHECK); -PASS_BENCHMARK(ZEROMORPH); \ No newline at end of file +ROUND_BENCHMARK(PREAMBLE)->Iterations(1); +ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); +ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); +ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); +ROUND_BENCHMARK(RELATION_CHECK); +ROUND_BENCHMARK(ZEROMORPH); \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp index 400ddf91d60..1815816a3c4 100644 --- a/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp +++ b/barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp @@ -9,6 +9,8 @@ // TODO(AD): Other instrumentation? #ifdef XRAY #define BBERG_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]] +#define BBERG_NO_PROFILE [[clang::xray_never_instrument]] #else #define BBERG_PROFILE +#define BBERG_NO_PROFILE #endif \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp index 47e03b5ea85..d3a1afac509 100644 --- a/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp +++ b/barretenberg/cpp/src/barretenberg/common/parallel_for_mutex_pool.cpp @@ -52,7 +52,7 @@ class ThreadPool { std::condition_variable complete_condition_; bool stop = false; - BBERG_NO_INSTRUMENT void worker_loop(size_t thread_index); + BBERG_NO_PROFILE void worker_loop(size_t thread_index); void do_iterations() { diff --git a/barretenberg/cpp/src/barretenberg/common/thread.hpp b/barretenberg/cpp/src/barretenberg/common/thread.hpp index 45f11037c7a..96e3df74092 100644 --- a/barretenberg/cpp/src/barretenberg/common/thread.hpp +++ b/barretenberg/cpp/src/barretenberg/common/thread.hpp @@ -23,23 +23,3 @@ inline size_t get_num_cpus_pow2() } void parallel_for(size_t num_iterations, const std::function& func); - -/** - * A modified parallel_for optimized for work being done in batches. - * This is more appropriate for work with small granularity, to avoid thread caching issues and overhead. - */ -inline void parallel_for_batched(size_t num_iterations, auto&& func) -{ - size_t num_threads = get_num_cpus_pow2(); - size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division - // We will use parallel_for to dispatch the batches - parallel_for(num_threads, [&](size_t thread_idx) { - // Calculate start and end for this batch - size_t start = thread_idx * batch_size; - size_t end = std::min(start + batch_size, num_iterations); - - for (size_t i = start; i < end; ++i) { - func(i); - } - }); -} \ No newline at end of file diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index b60f5dd85be..727575b7d56 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -18,6 +18,12 @@ namespace barretenberg { /** * Constructors / Destructors **/ + +/** + * @brief Initialize a Polynomial to size 'initial_size', zeroing memory. + * + * @param initial_size The initial size of the polynomial. + */ template Polynomial::Polynomial(size_t initial_size) : coefficients_(nullptr) @@ -29,6 +35,13 @@ Polynomial::Polynomial(size_t initial_size) memset(static_cast(coefficients_.get()), 0, sizeof(Fr) * capacity()); } +/** + * @brief Initialize a Polynomial to size 'initial_size'. + * Important: This does NOT zero memory. + * + * @param initial_size The initial size of the polynomial. + * @param flag Signals that we do not zero memory. + */ template Polynomial::Polynomial(size_t initial_size, DontZeroMemory flag) : coefficients_(nullptr) @@ -456,18 +469,16 @@ template Polynomial Polynomial::partial_evaluate_mle(std:: } // Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2}) for (size_t l = 1; l < m; ++l) { - size_t new_n_l = 1 << (n - l - 1); - Fr new_u_l = evaluation_points[m - l - 1]; - for (size_t i = 0; i < new_n_l; i++) { - // Iterate on increasingly small portions of intermediate results. - intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]); + n_l = 1 << (n - l - 1); + u_l = evaluation_points[m - l - 1]; + for (size_t i = 0; i < n_l; ++i) { + intermediate[i] += u_l * (intermediate[i + n_l] - intermediate[i]); } } - size_t final_n_l = 1 << (n - m); // Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer - Polynomial result(final_n_l, DontZeroMemory::FLAG); - for (size_t idx = 0; idx < final_n_l; ++idx) { + Polynomial result(n_l, DontZeroMemory::FLAG); + for (size_t idx = 0; idx < n_l; ++idx) { result[idx] = intermediate[idx]; }