Skip to content

Commit

Permalink
review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
ludamad0 committed Oct 23, 2023
1 parent 799203d commit cd42d16
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 70 deletions.
44 changes: 22 additions & 22 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,6 @@
"DISABLE_ASM": "ON"
}
},
{
"name": "tsan",
"displayName": "Debugging build with address sanitizer on Clang-16",
"description": "Build with address sanitizer on clang16 with debugging information",
"inherits": "clang16-dbg",
"binaryDir": "build-tsan",
"environment": {
"CFLAGS": "-fsanitize=thread",
"CXXFLAGS": "-fsanitize=thread",
"LDFLAGS": "-fsanitize=thread"
}
},
{
"name": "asan",
"displayName": "Debugging build with address sanitizer on Clang-16",
Expand Down Expand Up @@ -136,6 +124,18 @@
"SMT": "ON"
}
},
{
"name": "tsan",
"displayName": "Debugging build with thread sanitizer on Clang-16",
"description": "Build with thread sanitizer on clang16 with debugging information",
"inherits": "clang16-dbg",
"binaryDir": "build-tsan",
"environment": {
"CFLAGS": "-fsanitize=thread",
"CXXFLAGS": "-fsanitize=thread",
"LDFLAGS": "-fsanitize=thread"
}
},
{
"name": "coverage",
"displayName": "Build with coverage",
Expand Down Expand Up @@ -203,8 +203,8 @@
"inherits": "clang16",
"environment": {
"CFLAGS": "-fxray-instrument",
"CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=500",
"LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=500"
"CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=500 -DXRAY=1",
"LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=500 -DXRAY=1"
},
"binaryDir": "build-xray"
},
Expand All @@ -214,9 +214,9 @@
"description": "Build with Clang and enable detailed LLVM XRay for profiling",
"inherits": "xray",
"environment": {
"CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150",
"CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150",
"LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150"
"CFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1",
"CXXFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1",
"LDFLAGS": "-fxray-instrument -fxray-instruction-threshold=100 -finline-max-stacksize=150 -DXRAY=1"
},
"binaryDir": "build-xray-verbose"
},
Expand Down Expand Up @@ -258,11 +258,6 @@
"inherits": "default",
"configurePreset": "asan"
},
{
"name": "tsan",
"inherits": "default",
"configurePreset": "tsan"
},
{
"name": "gcc",
"inherits": "default",
Expand Down Expand Up @@ -293,6 +288,11 @@
"inherits": "clang16",
"configurePreset": "smt-verification"
},
{
"name": "tsan",
"inherits": "default",
"configurePreset": "tsan"
},
{
"name": "coverage",
"inherits": "default",
Expand Down
2 changes: 1 addition & 1 deletion barretenberg/cpp/scripts/collect_profile_information.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -eu

PRESET=${1:-xray} # can also be 'xray-1thread'
ONLY_PROCESS=${2:-}
EXECUTABLE=${3:-ultra_honk_passes_bench}
EXECUTABLE=${3:-ultra_honk_rounds_bench}

# Move above script dir.
cd $(dirname $0)/..
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set(BENCHMARK_SOURCES
standard_plonk.bench.cpp
ultra_honk.bench.cpp
ultra_honk_passes.bench.cpp
ultra_honk_rounds.bench.cpp
ultra_plonk.bench.cpp
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,19 @@
using namespace benchmark;
using namespace proof_system;

// The rounds to measure
enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH };

BBERG_PROFILE static void test_pass_inner(State& state, honk::UltraProver& prover, size_t index) noexcept
/**
* @details Benchmark ultrahonk by performing all the rounds, but only measuring one.
* Note: As a result the very short rounds take a long time for statistical significance, so recommended to set their
* iterations to 1.
* @param state - The google benchmark state.
* @param prover - The ultrahonk prover.
* @param index - The pass to measure.
**/
BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prover, size_t index) noexcept
{

auto time_if_index = [&](size_t target_index, auto&& func) -> void {
if (index == target_index) {
state.ResumeTiming();
Expand All @@ -33,27 +41,28 @@ BBERG_PROFILE static void test_pass_inner(State& state, honk::UltraProver& prove
state.ResumeTiming();
}
}
BBERG_PROFILE static void test_pass(State& state, size_t index) noexcept
BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
{
barretenberg::srs::init_crs_factory("../srs_db/ignition");

honk::UltraComposer composer;
honk::UltraProver prover = bench_utils::get_prover(
composer, &bench_utils::generate_ecdsa_verification_test_circuit<UltraCircuitBuilder>, 10);
test_pass_inner(state, prover, index);
// TODO(AD) benchmark both sparse and dense circuits?
honk::UltraProver prover =
bench_utils::get_prover(composer, &bench_utils::generate_keccak_test_circuit<UltraCircuitBuilder>, 1);
test_round_inner(state, prover, index);
}
#define PASS_BENCHMARK(pass) \
static void PASS_##pass(State& state) noexcept \
#define ROUND_BENCHMARK(round) \
static void ROUND_##round(State& state) noexcept \
{ \
test_pass(state, pass); \
test_round(state, round); \
} \
BENCHMARK(PASS_##pass)->Unit(::benchmark::kMillisecond)
BENCHMARK(ROUND_##round)->Unit(::benchmark::kMillisecond)

// Fast passes take a long time to benchmark because of how we compute statistical significance.
// Fast rounds take a long time to benchmark because of how we compute statistical significance.
// Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part.
PASS_BENCHMARK(PREAMBLE)->Iterations(1);
PASS_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1);
PASS_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1);
PASS_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1);
PASS_BENCHMARK(RELATION_CHECK);
PASS_BENCHMARK(ZEROMORPH);
ROUND_BENCHMARK(PREAMBLE)->Iterations(1);
ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1);
ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1);
ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1);
ROUND_BENCHMARK(RELATION_CHECK);
ROUND_BENCHMARK(ZEROMORPH);
2 changes: 2 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// TODO(AD): Other instrumentation?
#ifdef XRAY
#define BBERG_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]]
#define BBERG_NO_PROFILE [[clang::xray_never_instrument]]
#else
#define BBERG_PROFILE
#define BBERG_NO_PROFILE
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class ThreadPool {
std::condition_variable complete_condition_;
bool stop = false;

BBERG_NO_INSTRUMENT void worker_loop(size_t thread_index);
BBERG_NO_PROFILE void worker_loop(size_t thread_index);

void do_iterations()
{
Expand Down
20 changes: 0 additions & 20 deletions barretenberg/cpp/src/barretenberg/common/thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,3 @@ inline size_t get_num_cpus_pow2()
}

void parallel_for(size_t num_iterations, const std::function<void(size_t)>& func);

/**
* A modified parallel_for optimized for work being done in batches.
* This is more appropriate for work with small granularity, to avoid thread caching issues and overhead.
*/
inline void parallel_for_batched(size_t num_iterations, auto&& func)
{
size_t num_threads = get_num_cpus_pow2();
size_t batch_size = (num_iterations + num_threads - 1) / num_threads; // round up division
// We will use parallel_for to dispatch the batches
parallel_for(num_threads, [&](size_t thread_idx) {
// Calculate start and end for this batch
size_t start = thread_idx * batch_size;
size_t end = std::min(start + batch_size, num_iterations);

for (size_t i = start; i < end; ++i) {
func(i);
}
});
}
27 changes: 19 additions & 8 deletions barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ namespace barretenberg {
/**
* Constructors / Destructors
**/

/**
* @brief Initialize a Polynomial to size 'initial_size', zeroing memory.
*
* @param initial_size The initial size of the polynomial.
*/
template <typename Fr>
Polynomial<Fr>::Polynomial(size_t initial_size)
: coefficients_(nullptr)
Expand All @@ -29,6 +35,13 @@ Polynomial<Fr>::Polynomial(size_t initial_size)
memset(static_cast<void*>(coefficients_.get()), 0, sizeof(Fr) * capacity());
}

/**
* @brief Initialize a Polynomial to size 'initial_size'.
* Important: This does NOT zero memory.
*
* @param initial_size The initial size of the polynomial.
* @param flag Signals that we do not zero memory.
*/
template <typename Fr>
Polynomial<Fr>::Polynomial(size_t initial_size, DontZeroMemory flag)
: coefficients_(nullptr)
Expand Down Expand Up @@ -456,18 +469,16 @@ template <typename Fr> Polynomial<Fr> Polynomial<Fr>::partial_evaluate_mle(std::
}
// Evaluate m-1 variables X_{n-l-1}, ..., X_{n-2} at m-1 remaining values u_0,...,u_{m-2})
for (size_t l = 1; l < m; ++l) {
size_t new_n_l = 1 << (n - l - 1);
Fr new_u_l = evaluation_points[m - l - 1];
for (size_t i = 0; i < new_n_l; i++) {
// Iterate on increasingly small portions of intermediate results.
intermediate[i] += new_u_l * (intermediate[i + new_n_l] - intermediate[i]);
n_l = 1 << (n - l - 1);
u_l = evaluation_points[m - l - 1];
for (size_t i = 0; i < n_l; ++i) {
intermediate[i] += u_l * (intermediate[i + n_l] - intermediate[i]);
}
}

size_t final_n_l = 1 << (n - m);
// Construct resulting polynomial g(X_0,…,X_{n-m-1})) = p(X_0,…,X_{n-m-1},u_0,...u_{m-1}) from buffer
Polynomial<Fr> result(final_n_l, DontZeroMemory::FLAG);
for (size_t idx = 0; idx < final_n_l; ++idx) {
Polynomial<Fr> result(n_l, DontZeroMemory::FLAG);
for (size_t idx = 0; idx < n_l; ++idx) {
result[idx] = intermediate[idx];
}

Expand Down

0 comments on commit cd42d16

Please sign in to comment.