Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Benchmarks for basic functionality and IPA improvements #4004

Merged
merged 6 commits into from
Jan 13, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
wip
  • Loading branch information
Rumata888 committed Jan 12, 2024
commit 9f44311a2e77db731470601943d28c8a3f1331b2
4 changes: 3 additions & 1 deletion barretenberg/cpp/src/barretenberg/benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
add_subdirectory(ipa_bench)
add_subdirectory(decrypt_bench)
add_subdirectory(ipa_bench)
add_subdirectory(pippenger_bench)
add_subdirectory(plonk_bench)
add_subdirectory(ultra_bench)
add_subdirectory(goblin_bench)
add_subdirectory(honk_bench)
add_subdirectory(parallel_bench)
add_subdirectory(relations_bench)
add_subdirectory(widgets_bench)
add_subdirectory(protogalaxy_bench)
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Each source represents a separate benchmark suite
set(BENCHMARK_SOURCES
parallel.bench.cpp
)

# Required libraries for benchmark suites
set(LINKED_LIBRARIES
benchmark::benchmark
ecc
)

# Add executable and custom target for each suite, e.g. ultra_honk_bench
foreach(BENCHMARK_SOURCE ${BENCHMARK_SOURCES})
get_filename_component(BENCHMARK_NAME ${BENCHMARK_SOURCE} NAME_WE) # extract name without extension
add_executable(${BENCHMARK_NAME}_bench ${BENCHMARK_SOURCE})
target_link_libraries(${BENCHMARK_NAME}_bench ${LINKED_LIBRARIES})
add_custom_target(run_${BENCHMARK_NAME} COMMAND ${BENCHMARK_NAME} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
endforeach()
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/python3
import sys

filename=sys.argv[1]
lines=[]
header_found=False
x_exponents=[]
with open (filename) as f:
for line in f:
if line.find("name,iterations,real_time,cpu_time,time_unit,bytes_per_second,items_per_second,label,error_occurred,error_message")!=-1:
header_found=True
lines.append(line)
continue
if header_found:
lines.append(line)
x_exponents.append(int(line.replace('"','').split(',')[0].split('/')[1]))
with open(filename,"w") as f:
f.writelines(lines)
import numpy as np
data=np.genfromtxt(filename,delimiter=",",usemask=True)
y=np.transpose(data[1:])[2]
x=np.array([1<<i for i in x_exponents])
A=np.vstack([x,np.ones(len(x))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0]
print(m,c)
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
#include "barretenberg/common/thread.hpp"
#include "barretenberg/ecc/curves/bn254/bn254.hpp"
#include <benchmark/benchmark.h>

using namespace benchmark;
using namespace barretenberg;
namespace {
using Curve = curve::BN254;
using Fr = Curve::ScalarField;
#define MAX_REPETITION_LOG 12

/**
* @brief Benchmark for evaluating the cost of starting parallel_for
*
* @details It seems parallel_for takes ~400 microseconds to start
* @param state
*/
void parallel_for_field_element_addition(State& state)
{
numeric::random::Engine& engine = numeric::random::get_debug_engine();
size_t num_cpus = get_num_cpus();
std::vector<std::vector<Fr>> copy_vector(num_cpus);
for (size_t i = 0; i < num_cpus; i++) {
for (size_t j = 0; j < 2; j++) {
copy_vector[i].emplace_back(Fr::random_element(&engine));
copy_vector[i].emplace_back(Fr::random_element(&engine));
}
}
for (auto _ : state) {
state.PauseTiming();
size_t num_external_cycles = 1 << static_cast<size_t>(state.range(0));
size_t num_internal_cycles = 1 << (MAX_REPETITION_LOG - static_cast<size_t>(state.range(0)));
state.ResumeTiming();
for (size_t i = 0; i < num_external_cycles; i++) {
parallel_for(num_cpus, [num_internal_cycles, &copy_vector](size_t index) {
for (size_t i = 0; i < num_internal_cycles; i++) {
copy_vector[index][i & 1] += copy_vector[index][1 - (i & 1)];
}
});
}
}
}

/**
* @brief Evaluate how much finite addition costs (in cache)
*
*@details ~4 ns if we subtract i++ operation
* @param state
*/
void ff_addition(State& state)
{
numeric::random::Engine& engine = numeric::random::get_debug_engine();
std::vector<Fr> copy_vector(2);
for (size_t j = 0; j < 2; j++) {
copy_vector.emplace_back(Fr::random_element(&engine));
copy_vector.emplace_back(Fr::random_element(&engine));
}

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (size_t i = 0; i < num_cycles; i++) {
copy_vector[i & 1] += copy_vector[1 - (i & 1)];
}
}
}

/**
* @brief Evaluate how much finite field multiplication costs (in cache)
*
*@details ~25 ns if we subtract i++ operation
* @param state
*/
void ff_multiplication(State& state)
{
numeric::random::Engine& engine = numeric::random::get_debug_engine();
std::vector<Fr> copy_vector(2);
for (size_t j = 0; j < 2; j++) {
copy_vector.emplace_back(Fr::random_element(&engine));
copy_vector.emplace_back(Fr::random_element(&engine));
}

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (size_t i = 0; i < num_cycles; i++) {
copy_vector[i & 1] *= copy_vector[1 - (i & 1)];
}
}
}

/**
* @brief Evaluate how much finite field squaring costs (in cache)
*
*@details ~19 ns if we subtract i++ operation
* @param state
*/
void ff_sqr(State& state)
{
numeric::random::Engine& engine = numeric::random::get_debug_engine();
std::vector<Fr> copy_vector(2);
for (size_t j = 0; j < 2; j++) {
copy_vector.emplace_back(Fr::random_element(&engine));
copy_vector.emplace_back(Fr::random_element(&engine));
}

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (size_t i = 0; i < num_cycles; i++) {
copy_vector[0] = copy_vector[0].sqr();
}
}
}

/**
* @brief Evaluate how much projective point addition costs (in cache)
*
*@details ~350 ns if we subtract i++ operation
* @param state
*/
void projective_point_addition(State& state)
{
numeric::random::Engine& engine = numeric::random::get_debug_engine();
std::vector<Curve::Element> copy_vector(2);
for (size_t j = 0; j < 2; j++) {
copy_vector.emplace_back(Curve::Element::random_element(&engine));
copy_vector.emplace_back(Curve::Element::random_element(&engine));
}

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (size_t i = 0; i < num_cycles; i++) {
copy_vector[i & 1] += copy_vector[1 - (i & 1)];
}
}
}

/**
* @brief Evaluate how much projective point doubling costs when we trigger it through addition (in cache)
*
*@details ~354 ns if we subtract i++ operation
* @param state
*/
void projective_point_accidental_doubling(State& state)
{
numeric::random::Engine& engine = numeric::random::get_debug_engine();
std::vector<Curve::Element> copy_vector(2);
for (size_t j = 0; j < 2; j++) {
copy_vector.emplace_back(Curve::Element::random_element(&engine));
copy_vector.emplace_back(Curve::Element::random_element(&engine));
}

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (size_t i = 0; i < num_cycles; i++) {
copy_vector[0] += copy_vector[0];
}
}
}

/**
* @brief Evaluate how much projective point doubling costs (in cache)
*
*@details ~195 ns if we subtract i++ operation
* @param state
*/
void projective_point_doubling(State& state)
{
numeric::random::Engine& engine = numeric::random::get_debug_engine();
std::vector<Curve::Element> copy_vector(2);
for (size_t j = 0; j < 2; j++) {
copy_vector.emplace_back(Curve::Element::random_element(&engine));
copy_vector.emplace_back(Curve::Element::random_element(&engine));
}

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (size_t i = 0; i < num_cycles; i++) {
copy_vector[0] = copy_vector[0].dbl();
}
}
}
/**
* @brief Evaluate how much running the loop costs in benchmarks
*
* @details 0.6~0.7 ns per cycle
* @param state
*/
void cycle_waste(State& state)
{

for (auto _ : state) {
state.PauseTiming();
size_t num_cycles = 1 << static_cast<size_t>(state.range(0));
state.ResumeTiming();
for (volatile size_t i = 0; i < num_cycles;) {
i = i + 1;
}
}
}
} // namespace

BENCHMARK(parallel_for_field_element_addition)->Unit(kMicrosecond)->DenseRange(0, MAX_REPETITION_LOG);
BENCHMARK(ff_addition)->Unit(kMicrosecond)->DenseRange(12, 30);
BENCHMARK(ff_multiplication)->Unit(kMicrosecond)->DenseRange(12, 27);
BENCHMARK(ff_sqr)->Unit(kMicrosecond)->DenseRange(12, 27);
BENCHMARK(projective_point_addition)->Unit(kMicrosecond)->DenseRange(12, 22);
BENCHMARK(projective_point_accidental_doubling)->Unit(kMicrosecond)->DenseRange(12, 22);
BENCHMARK(projective_point_doubling)->Unit(kMicrosecond)->DenseRange(12, 22);
BENCHMARK(cycle_waste)->Unit(kMicrosecond)->DenseRange(20, 30);
BENCHMARK_MAIN();