Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bb): op counting mode #4437

Merged
merged 14 commits into from
Feb 6, 2024
15 changes: 15 additions & 0 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,16 @@
"LDFLAGS": "-fsanitize=memory"
}
},
{
"name": "op-counting",
"displayName": "Release build with operation counts for benchmarks",
"description": "Build with op counting",
"inherits": "clang16",
"binaryDir": "build-op-counting",
"environment": {
"CXXFLAGS": "-DBB_USE_OP_COUNT"
}
},
{
"name": "coverage",
"displayName": "Build with coverage",
Expand Down Expand Up @@ -300,6 +310,11 @@
"inherits": "default",
"configurePreset": "clang16"
},
{
"name": "op-counting",
"inherits": "default",
"configurePreset": "op-counting"
},
{
"name": "clang16-dbg",
"inherits": "default",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* sequential_copy: 3.3
*
*/
#include "barretenberg/common/op_count.hpp"
#include "barretenberg/common/thread.hpp"
#include "barretenberg/ecc/curves/bn254/bn254.hpp"
#include <benchmark/benchmark.h>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <benchmark/benchmark.h>

#include "barretenberg/benchmark/ultra_bench/mock_proofs.hpp"
#include "barretenberg/common/op_count_google_bench.hpp"
#include "barretenberg/goblin/goblin.hpp"
#include "barretenberg/goblin/mock_circuits.hpp"
#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
Expand Down Expand Up @@ -67,6 +68,7 @@ BENCHMARK_DEFINE_F(GoblinBench, GoblinFull)(benchmark::State& state)
GoblinMockCircuits::perform_op_queue_interactions_for_mock_first_circuit(goblin.op_queue);

for (auto _ : state) {
BB_REPORT_OP_COUNT_IN_BENCH(state);
// Perform a specified number of iterations of function/kernel accumulation
perform_goblin_accumulation_rounds(state, goblin);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <benchmark/benchmark.h>

#include "barretenberg/benchmark/ultra_bench/mock_proofs.hpp"
#include "barretenberg/common/op_count_google_bench.hpp"
#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
#include "barretenberg/ultra_honk/ultra_composer.hpp"
#include "barretenberg/ultra_honk/ultra_prover.hpp"
Expand All @@ -27,15 +28,20 @@ enum {
* @param prover - The ultrahonk prover.
* @param index - The pass to measure.
**/
BBERG_PROFILE static void test_round_inner(State& state, UltraProver& prover, size_t index) noexcept
BB_PROFILE static void test_round_inner(State& state, UltraProver& prover, size_t index) noexcept
{
auto time_if_index = [&](size_t target_index, auto&& func) -> void {
BB_REPORT_OP_COUNT_IN_BENCH(state);
if (index == target_index) {
state.ResumeTiming();
}

func();
if (index == target_index) {
state.PauseTiming();
} else {
// We don't actually want to write to user-defined counters
BB_REPORT_OP_COUNT_BENCH_CANCEL();
}
};

Expand All @@ -47,7 +53,7 @@ BBERG_PROFILE static void test_round_inner(State& state, UltraProver& prover, si
time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
}
BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
BB_PROFILE static void test_round(State& state, size_t index) noexcept
{
bb::srs::init_crs_factory("../srs_db/ignition");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ enum {
SIXTH_BATCH_OPEN
};

BBERG_PROFILE static void plonk_round(
BB_PROFILE static void plonk_round(
State& state, plonk::UltraProver& prover, size_t target_index, size_t index, auto&& func) noexcept
{
if (index == target_index) {
Expand All @@ -37,7 +37,7 @@ BBERG_PROFILE static void plonk_round(
* @param prover - The ultraplonk prover.
* @param index - The pass to measure.
**/
BBERG_PROFILE static void test_round_inner(State& state, plonk::UltraProver& prover, size_t index) noexcept
BB_PROFILE static void test_round_inner(State& state, plonk::UltraProver& prover, size_t index) noexcept
{
plonk_round(state, prover, PREAMBLE, index, [&] { prover.execute_preamble_round(); });
plonk_round(state, prover, FIRST_WIRE_COMMITMENTS, index, [&] { prover.execute_first_round(); });
Expand All @@ -47,7 +47,7 @@ BBERG_PROFILE static void test_round_inner(State& state, plonk::UltraProver& pro
plonk_round(state, prover, FIFTH_COMPUTE_QUOTIENT_EVALUTION, index, [&] { prover.execute_fifth_round(); });
plonk_round(state, prover, SIXTH_BATCH_OPEN, index, [&] { prover.execute_sixth_round(); });
}
BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
BB_PROFILE static void test_round(State& state, size_t index) noexcept
{
bb::srs::init_crs_factory("../srs_db/ignition");
for (auto _ : state) {
Expand Down
22 changes: 16 additions & 6 deletions barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
#pragma once

#ifdef _WIN32
#define BBERG_INLINE __forceinline inline
#define BB_INLINE __forceinline inline
#else
#define BBERG_INLINE __attribute__((always_inline)) inline
#define BB_INLINE __attribute__((always_inline)) inline
#endif

// TODO(AD): Other instrumentation?
#ifdef XRAY
#define BBERG_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]]
#define BBERG_NO_PROFILE [[clang::xray_never_instrument]]
#define BB_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]]
#define BB_NO_PROFILE [[clang::xray_never_instrument]]
#else
#define BBERG_PROFILE
#define BBERG_NO_PROFILE
#define BB_PROFILE
#define BB_NO_PROFILE
#endif

// Optimization hints for clang - which outcome of an expression is expected for better
// branch-prediction optimization
#ifdef __clang__
#define BB_LIKELY(x) __builtin_expect(!!(x), 1)
#define BB_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define BB_LIKELY(x) x
#define BB_UNLIKELY(x) x
#endif
51 changes: 51 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/op_count.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

#include <cstddef>
#ifdef BB_USE_OP_COUNT
#include "op_count.hpp"
#include <iostream>
#include <sstream>
#include <thread>

namespace bb::detail {
void GlobalOpCountContainer::add_entry(const char* key, std::size_t* count)
{
std::unique_lock<std::mutex> lock(mutex);
std::stringstream ss;
ss << std::this_thread::get_id();
counts.push_back({ key, ss.str(), count });
}

void GlobalOpCountContainer::print() const
{
std::cout << "print_op_counts() START" << std::endl;
for (const Entry& entry : counts) {
if (*entry.count > 0) {
std::cout << entry.key << "\t" << *entry.count << "\t[thread=" << entry.thread_id << "]" << std::endl;
}
}
std::cout << "print_op_counts() END" << std::endl;
}

std::map<std::string, std::size_t> GlobalOpCountContainer::get_aggregate_counts() const
{
std::map<std::string, std::size_t> aggregate_counts;
for (const Entry& entry : counts) {
if (*entry.count > 0) {
aggregate_counts[entry.key] += *entry.count;
}
}
return aggregate_counts;
}

void GlobalOpCountContainer::clear()
{
std::unique_lock<std::mutex> lock(mutex);
for (Entry& entry : counts) {
*entry.count = 0;
}
}

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
GlobalOpCountContainer GLOBAL_OP_COUNTS;
} // namespace bb::detail
#endif
87 changes: 87 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/op_count.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@

#pragma once

#ifndef BB_USE_OP_COUNT
// require a semicolon to appease formatters
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK() (void)0
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK_NAME(name) (void)0
#else
/**
* Provides an abstraction that counts operations based on function names.
* For efficiency, we spread out counts across threads.
*/

#include "barretenberg/common/compiler_hints.hpp"
#include <algorithm>
#include <atomic>
#include <cstdlib>
#include <map>
#include <mutex>
#include <string>
#include <vector>
namespace bb::detail {
// Compile-time string
// See e.g. https://www.reddit.com/r/cpp_questions/comments/pumi9r/does_c20_not_support_string_literals_as_template/
template <std::size_t N> struct OperationLabel {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
constexpr OperationLabel(const char (&str)[N])
{
for (std::size_t i = 0; i < N; ++i) {
value[i] = str[i];
}
}

// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
char value[N];
};

// Contains all statically known op counts
struct GlobalOpCountContainer {
public:
struct Entry {
std::string key;
std::string thread_id;
std::size_t* count;
};
std::mutex mutex;
std::vector<Entry> counts;
void print() const;
// NOTE: Should be called when other threads aren't active
void clear();
void add_entry(const char* key, std::size_t* count);
std::map<std::string, std::size_t> get_aggregate_counts() const;
};

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
extern GlobalOpCountContainer GLOBAL_OP_COUNTS;

template <OperationLabel Op> struct GlobalOpCount {
public:
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
static thread_local std::size_t* thread_local_count;

static constexpr void increment_op_count()
{
if (std::is_constant_evaluated()) {
// We do nothing if the compiler tries to run this
return;
}
if (BB_UNLIKELY(thread_local_count == nullptr)) {
thread_local_count = new std::size_t();
GLOBAL_OP_COUNTS.add_entry(Op.value, thread_local_count);
}
(*thread_local_count)++;
}
};
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
template <OperationLabel Op> thread_local std::size_t* GlobalOpCount<Op>::thread_local_count;

} // namespace bb::detail

// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK() bb::detail::GlobalOpCount<__func__>::increment_op_count()
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK_NAME(name) bb::detail::GlobalOpCount<name>::increment_op_count()
#endif
50 changes: 50 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/op_count_google_bench.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

#pragma once
#include <benchmark/benchmark.h>

#ifndef BB_USE_OP_COUNT
namespace bb {
struct GoogleBenchOpCountReporter {
GoogleBenchOpCountReporter(::benchmark::State& state)
{
// unused, we don't have op counts on
(void)state;
}
};
}; // namespace bb
// require a semicolon to appease formatters
#define BB_REPORT_OP_COUNT_IN_BENCH(state) (void)0
#define BB_REPORT_OP_COUNT_BENCH_CANCEL() (void)0
#else
#include "op_count.hpp"
namespace bb {
// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
struct GoogleBenchOpCountReporter {
// We allow having a ref member as this only lives inside a function frame
::benchmark::State& state;
bool cancelled = false;
GoogleBenchOpCountReporter(::benchmark::State& state)
: state(state)
{
// Intent: Clear when we enter the state loop
bb::detail::GLOBAL_OP_COUNTS.clear();
}
~GoogleBenchOpCountReporter()
{
// Allow for conditional reporting
if (cancelled) {
return;
}
// Intent: Collect results when we exit the state loop
for (auto& entry : bb::detail::GLOBAL_OP_COUNTS.get_aggregate_counts()) {
state.counters[entry.first] = static_cast<double>(entry.second);
}
}
};
// Allow for integration with google benchmark user-defined counters
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_REPORT_OP_COUNT_IN_BENCH(state) GoogleBenchOpCountReporter __bb_report_op_count_in_bench{ state };
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_REPORT_OP_COUNT_BENCH_CANCEL() __bb_report_op_count_in_bench.cancelled = true;
}; // namespace bb
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class ThreadPool {
std::condition_variable complete_condition_;
bool stop = false;

BBERG_NO_PROFILE void worker_loop(size_t thread_index);
BB_NO_PROFILE void worker_loop(size_t thread_index);

void do_iterations()
{
Expand Down
Loading
Loading