diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp index 292cf11bbd7..11ea759d4f2 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.cpp @@ -60,10 +60,29 @@ void Polynomial::allocate_backing_memory(size_t size, size_t virtual_size, s * * @param size The size of the polynomial. */ -template Polynomial::Polynomial(size_t size, size_t virtual_size, size_t start_index) +template +Polynomial::Polynomial(size_t size, size_t virtual_size, size_t start_index, bool disable_parallelisation) { + PROFILE_THIS_NAME("polynomial allocation with zeroing"); + allocate_backing_memory(size, virtual_size, start_index); - memset(static_cast(coefficients_.backing_memory_.get()), 0, sizeof(Fr) * size); + if (disable_parallelisation) { + // In AVM polynomials are small and already constructed in parallel + memset(static_cast(coefficients_.backing_memory_.get()), 0, sizeof(Fr) * size); + return; + } + + size_t num_threads = calculate_num_threads(size); + size_t range_per_thread = size / num_threads; + size_t leftovers = size - (range_per_thread * num_threads); + + parallel_for(num_threads, [&](size_t j) { + size_t offset = j * range_per_thread; + size_t range = (j == num_threads - 1) ? range_per_thread + leftovers : range_per_thread; + ASSERT(offset < size || size == 0); + ASSERT((offset + range) <= size); + memset(static_cast(coefficients_.backing_memory_.get() + offset), 0, sizeof(Fr) * range); + }); } /** @@ -76,6 +95,7 @@ template Polynomial::Polynomial(size_t size, size_t virtual_si template Polynomial::Polynomial(size_t size, size_t virtual_size, size_t start_index, [[maybe_unused]] DontZeroMemory flag) { + PROFILE_THIS_NAME("polynomial allocation without zeroing"); allocate_backing_memory(size, virtual_size, start_index); } diff --git a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp index c51597d2276..17d6ab34c61 100644 --- a/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp +++ b/barretenberg/cpp/src/barretenberg/polynomials/polynomial.hpp @@ -65,13 +65,11 @@ template class Polynomial { using FF = Fr; enum class DontZeroMemory { FLAG }; - Polynomial(size_t size, size_t virtual_size, size_t start_index = 0); + Polynomial(size_t size, size_t virtual_size, size_t start_index = 0, bool disable_parallelisation = false); // Intended just for plonk, where size == virtual_size always Polynomial(size_t size) - : Polynomial(size, size) - { - PROFILE_THIS(); - } + : Polynomial(size, size){}; + // Constructor that does not initialize values, use with caution to save time. Polynomial(size_t size, size_t virtual_size, size_t start_index, DontZeroMemory flag); Polynomial(size_t size, size_t virtual_size, DontZeroMemory flag) diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp index cb9f954d193..05797f2708c 100644 --- a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp +++ b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp @@ -96,7 +96,7 @@ template class DeciderProvingKey_ { } { - PROFILE_THIS_NAME("constructing proving key"); + PROFILE_THIS_NAME("allocating proving key"); proving_key = ProvingKey(dyadic_circuit_size, circuit.public_inputs.size(), commitment_key); // If not using structured trace OR if using structured trace but overflow has occurred (overflow block in @@ -188,7 +188,7 @@ template class DeciderProvingKey_ { // Allocate the table polynomials if constexpr (IsUltraFlavor) { for (auto& poly : proving_key.polynomials.get_tables()) { - poly = typename Flavor::Polynomial(max_tables_size, dyadic_circuit_size, table_offset); + poly = Polynomial(max_tables_size, dyadic_circuit_size, table_offset); } } } @@ -196,19 +196,19 @@ template class DeciderProvingKey_ { PROFILE_THIS_NAME("allocating sigmas and ids"); for (auto& sigma : proving_key.polynomials.get_sigmas()) { - sigma = typename Flavor::Polynomial(proving_key.circuit_size); + sigma = Polynomial(proving_key.circuit_size); } for (auto& id : proving_key.polynomials.get_ids()) { - id = typename Flavor::Polynomial(proving_key.circuit_size); + id = Polynomial(proving_key.circuit_size); } } { ZoneScopedN("allocating lookup read counts and tags"); // Allocate the read counts and tags polynomials proving_key.polynomials.lookup_read_counts = - typename Flavor::Polynomial(max_tables_size, dyadic_circuit_size, table_offset); + Polynomial(max_tables_size, dyadic_circuit_size, table_offset); proving_key.polynomials.lookup_read_tags = - typename Flavor::Polynomial(max_tables_size, dyadic_circuit_size, table_offset); + Polynomial(max_tables_size, dyadic_circuit_size, table_offset); } { ZoneScopedN("allocating lookup and databus inverses"); diff --git a/barretenberg/cpp/src/barretenberg/vm/avm/generated/circuit_builder.cpp b/barretenberg/cpp/src/barretenberg/vm/avm/generated/circuit_builder.cpp index 6a3d9ae9f17..5f37b4e56b4 100644 --- a/barretenberg/cpp/src/barretenberg/vm/avm/generated/circuit_builder.cpp +++ b/barretenberg/cpp/src/barretenberg/vm/avm/generated/circuit_builder.cpp @@ -51,49 +51,54 @@ AvmCircuitBuilder::ProverPolynomials AvmCircuitBuilder::compute_polynomials() co })); // catch-all with fully formed polynomials - AVM_TRACK_TIME( - "circuit_builder/init_polys_unshifted", ({ - auto unshifted = polys.get_unshifted(); + AVM_TRACK_TIME("circuit_builder/init_polys_unshifted", ({ + auto unshifted = polys.get_unshifted(); - // An array which stores for each column of the trace the smallest size of the - // truncated column containing all non-zero elements. - // It is used to allocate the polynomials without memory overhead for the tail of zeros. - std::array col_nonzero_size{}; + // An array which stores for each column of the trace the smallest size of the + // truncated column containing all non-zero elements. + // It is used to allocate the polynomials without memory overhead for the tail of zeros. + std::array col_nonzero_size{}; - // Computation of size of columns. - // Non-parallel version takes 0.5 second for a trace size of 200k rows. - // A parallel version might be considered in the future. - for (size_t i = 0; i < num_rows; i++) { - const auto row = rows[i].as_vector(); - for (size_t col = 0; col < Row::SIZE; col++) { - if (!row[col].is_zero()) { - col_nonzero_size[col] = i + 1; - } - } - } + // Computation of size of columns. + // Non-parallel version takes 0.5 second for a trace size of 200k rows. + // A parallel version might be considered in the future. + for (size_t i = 0; i < num_rows; i++) { + const auto row = rows[i].as_vector(); + for (size_t col = 0; col < Row::SIZE; col++) { + if (!row[col].is_zero()) { + col_nonzero_size[col] = i + 1; + } + } + } - // Set of the labels for derived/inverse polynomials. - const auto derived_labels = polys.get_derived_labels(); - std::set derived_labels_set(derived_labels.begin(), derived_labels.end()); + // Set of the labels for derived/inverse polynomials. + const auto derived_labels = polys.get_derived_labels(); + std::set derived_labels_set(derived_labels.begin(), derived_labels.end()); - bb::parallel_for(num_unshifted, [&](size_t i) { - auto& poly = unshifted[i]; - const auto col_idx = polys_to_cols_unshifted_idx[i]; - size_t col_size = 0; + bb::parallel_for(num_unshifted, [&](size_t i) { + auto& poly = unshifted[i]; + const auto col_idx = polys_to_cols_unshifted_idx[i]; + size_t col_size = 0; - // We fully allocate the inverse polynomials. We leave this potential memory optimization for later. - if (derived_labels_set.contains(labels[i])) { - col_size = num_rows; - } else { - col_size = col_nonzero_size[col_idx]; - } + // We fully allocate the inverse polynomials. We leave this potential memory optimization for + // later. + if (derived_labels_set.contains(labels[i])) { + col_size = num_rows; + } else { + col_size = col_nonzero_size[col_idx]; + } - if (poly.is_empty()) { - // Not set above - poly = Polynomial{ /*memory size*/ col_size, /*largest possible index*/ circuit_subgroup_size }; - } - }); - })); + if (poly.is_empty()) { + // Not set above + poly = Polynomial{ /*memory size*/ + col_size, + /*largest possible index as virtual size*/ circuit_subgroup_size, + /*start_index=*/0, + /*/*disable parallel initialisation=*/true + }; + } + }); + })); AVM_TRACK_TIME( "circuit_builder/set_polys_unshifted", ({ diff --git a/bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs b/bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs index 5d79f5e0389..537361df77e 100644 --- a/bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs +++ b/bb-pilcom/bb-pil-backend/templates/circuit_builder.cpp.hbs @@ -89,8 +89,11 @@ namespace bb { if (poly.is_empty()) { // Not set above - poly = Polynomial{ /*memory size*/ col_size, /*largest possible index*/ circuit_subgroup_size }; - } + poly = Polynomial{ /*memory size*/ col_size, + /*largest possible index as virtual size*/ circuit_subgroup_size, + /*start_index=*/0, + /*disable parallel initialization=*/true + }; } }); }));