Skip to content

Commit

Permalink
chore: Remove handling of duplicates from the note hash tree (#10016)
Browse files Browse the repository at this point in the history
This PR removes the abilitity to handle duplicates in the note hash
tree. Since #9492, all trees now contain unique leaves only. Removing
the handling of duplicates reduces the complexity and storage
requirements of the trees.

---------

Co-authored-by: ludamad <[email protected]>
  • Loading branch information
PhilWindle and ludamad authored Nov 25, 2024
1 parent 2add6ae commit ece1d45
Show file tree
Hide file tree
Showing 15 changed files with 196 additions and 817 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,7 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, errors_are_caught_and_handle
std::string name = random_string();
std::string directory = random_temp_directory();
std::filesystem::create_directories(directory);
auto& random_engine = numeric::get_randomness();

{
LMDBTreeStore::SharedPtr db = std::make_shared<LMDBTreeStore>(_directory, name, 50, _maxReaders);
Expand All @@ -492,9 +493,10 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, errors_are_caught_and_handle

// Add lots of values to the tree
uint32_t num_values_to_add = 16 * 1024;
std::vector<fr> values(num_values_to_add, VALUES[0]);
std::vector<fr> values;
for (uint32_t i = 0; i < num_values_to_add; i++) {
memdb.update_element(i, VALUES[0]);
values.emplace_back(random_engine.get_random_uint256());
memdb.update_element(i, values[i]);
}
add_values(tree, values);

Expand Down Expand Up @@ -714,46 +716,31 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, test_find_leaf_index)

commit_tree(tree);

values = { 16, 4, 18, 22 };
values = { 16, 4, 19, 22 };
add_values(tree, values);

// we now have duplicate leaf 18, one committed the other not
check_find_leaf_index(tree, 18, 5, true, true);
check_find_leaf_index(tree, 18, 5, true, false);

// verify the find index from api
check_find_leaf_index_from(tree, 18, 0, 5, true, true);
check_find_leaf_index_from(tree, 18, 6, 10, true, true);
check_find_leaf_index_from(tree, 18, 6, 0, false, false);
check_find_leaf_index_from(tree, 19, 6, 10, true, true);
check_find_leaf_index_from(tree, 19, 0, 0, false, false);

commit_tree(tree);

// add another leaf 18
add_value(tree, 18);

// should return the first index
check_find_leaf_index_from(tree, 18, 0, 5, true, false);
check_find_leaf_index_from(tree, 18, 0, 5, true, true);

add_value(tree, 88);
// and another uncommitted 18
add_value(tree, 18);

add_value(tree, 32);

// should return the first uncommitted
check_find_leaf_index_from(tree, 18, 12, 12, true, true);
check_find_leaf_index_from(tree, 18, 14, 14, true, true);
check_find_leaf_index_from(tree, 18, 15, 0, false, true);
check_size(tree, 14);
check_size(tree, 12, false);

// look past the last instance of this leaf
check_find_leaf_index_from(tree, 18, 17, 0, false, true);
check_find_leaf_index_from(tree, 18, 6, 0, false, true);

// look beyond the end of uncommitted
check_find_leaf_index_from(tree, 18, 18, 0, false, true);
check_find_leaf_index_from(tree, 18, 15, 0, false, true);

// look beyond the end of committed and don't include uncomitted
check_find_leaf_index_from(tree, 18, 14, 0, false, false);
check_find_leaf_index_from(tree, 88, 13, 0, false, false);
}

TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_add_multiple_values)
Expand Down Expand Up @@ -975,7 +962,7 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, test_find_historic_leaf_inde

commit_tree(tree);

values = { 16, 4, 18, 22 };
values = { 16, 4, 19, 22 };
add_values(tree, values);

// should not be present at block 1
Expand All @@ -987,15 +974,15 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, test_find_historic_leaf_inde
check_find_historic_leaf_index_from(tree, 1, 18, 2, 0, false, false);
// at block 2 it should be
check_find_historic_leaf_index_from(tree, 2, 18, 2, 5, true);
// at block 2, from index 6 it should not be found if looking only at committed
check_find_historic_leaf_index_from(tree, 2, 18, 6, 5, false, false);
// at block 2, from index 6 it should be found if looking at uncommitted too
check_find_historic_leaf_index_from(tree, 2, 18, 6, 10, true);
// at block 2, from index 6, 19 should not be found if looking only at committed
check_find_historic_leaf_index_from(tree, 2, 19, 6, 5, false, false);
// at block 2, from index 6, 19 should be found if looking at uncommitted too
check_find_historic_leaf_index_from(tree, 2, 19, 6, 10, true);

commit_tree(tree);

// at block 3, from index 6 it should now be found in committed only
check_find_historic_leaf_index_from(tree, 3, 18, 6, 10, true, false);
// at block 3, from index 6, should now be found in committed only
check_find_historic_leaf_index_from(tree, 3, 19, 6, 10, true, false);
}

TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_be_filled)
Expand Down Expand Up @@ -1418,79 +1405,6 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_unwind_all_blocks)
test_unwind(_directory, "DB", _mapSize, _maxReaders, 10, 16, 16, 16, second);
}

TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_unwind_blocks_with_duplicate_leaves)
{
constexpr size_t depth = 4;
std::string name = random_string();
LMDBTreeStore::SharedPtr db = std::make_shared<LMDBTreeStore>(_directory, name, _mapSize, _maxReaders);
std::unique_ptr<Store> store = std::make_unique<Store>(name, depth, db);
ThreadPoolPtr pool = make_thread_pool(1);
TreeType tree(std::move(store), pool);
MemoryTree<Poseidon2HashPolicy> memdb(depth);

constexpr size_t blockSize = 2;
constexpr size_t numBlocks = 2;
constexpr size_t numBlocksToUnwind = 1;

std::vector<fr> values = create_values(blockSize);

// Add the same batch of values many times
for (size_t i = 0; i < numBlocks; i++) {
for (size_t j = 0; j < values.size(); j++) {
size_t ind = i * blockSize + j;
memdb.update_element(ind, values[j]);
}
add_values(tree, values);
commit_tree(tree);
check_block_and_root_data(db, i + 1, memdb.root(), true);

for (size_t j = 0; j < values.size(); j++) {
size_t ind = i * blockSize + j;
// query the indices db directly
check_indices_data(db, values[j], ind, true, true);
}
}

for (size_t i = 0; i < numBlocks; i++) {
index_t startIndex = i * blockSize;
index_t expectedIndex = startIndex + 1;

// search for the leaf from start of each batch
check_find_leaf_index_from(tree, values[1], startIndex, expectedIndex, true);
// search for the leaf from start of the next batch
check_find_leaf_index_from(tree, values[1], startIndex + 2, expectedIndex + blockSize, i < (numBlocks - 1));
}

const uint32_t blocksToRemove = numBlocksToUnwind;
for (uint32_t i = 0; i < blocksToRemove; i++) {
const index_t blockNumber = numBlocks - i;
unwind_block(tree, blockNumber);

const index_t previousValidBlock = blockNumber - 1;
index_t deletedBlockStartIndex = previousValidBlock * blockSize;

check_block_height(tree, previousValidBlock);
check_size(tree, deletedBlockStartIndex);

for (size_t j = 0; j < numBlocks; j++) {
index_t startIndex = j * blockSize;
index_t expectedIndex = startIndex + 1;

// search for the leaf from start of each batch
check_find_leaf_index_from(tree, values[1], startIndex, expectedIndex, j < previousValidBlock);
// search for the leaf from start of the next batch
check_find_leaf_index_from(
tree, values[1], startIndex + 2, expectedIndex + blockSize, j < (previousValidBlock - 1));

for (size_t k = 0; k < values.size(); k++) {
size_t ind = j * blockSize + k;
// query the indices db directly. If block number == 1 that means the entry should not be present
check_indices_data(db, values[k], ind, blockNumber > 1, ind < deletedBlockStartIndex);
}
}
}
}

TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_sync_and_unwind_large_blocks)
{

Expand Down Expand Up @@ -1534,23 +1448,15 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_advance_finalised_blocks

index_t expectedFinalisedBlock = i < finalisedBlockDelay ? 0 : i - finalisedBlockDelay;
check_finalised_block_height(tree, expectedFinalisedBlock);
index_t expectedPresentStart = i < finalisedBlockDelay ? 0 : (expectedFinalisedBlock * blockSize);
index_t expectedPresentEnd = ((i + 1) * blockSize) - 1;
std::vector<fr> toTest(values.begin() + static_cast<int64_t>(expectedPresentStart),
values.begin() + static_cast<int64_t>(expectedPresentEnd + 1));
check_leaf_keys_are_present(db, expectedPresentStart, expectedPresentEnd, toTest);

if (i >= finalisedBlockDelay) {

index_t blockToFinalise = expectedFinalisedBlock + 1;

// attemnpting to finalise a block that doesn't exist should fail
// attempting to finalise a block that doesn't exist should fail
finalise_block(tree, blockToFinalise + numBlocks, false);

finalise_block(tree, blockToFinalise, true);

index_t expectedNotPresentEnd = (blockToFinalise * blockSize) - 1;
check_leaf_keys_are_not_present(db, 0, expectedNotPresentEnd);
}
}
}
Expand Down Expand Up @@ -1585,12 +1491,7 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_finalise_multiple_blocks

index_t blockToFinalise = 8;

check_leaf_keys_are_present(db, 0, (numBlocks * blockSize) - 1, values);

finalise_block(tree, blockToFinalise);

index_t expectedNotPresentEnd = (blockToFinalise * blockSize) - 1;
check_leaf_keys_are_not_present(db, 0, expectedNotPresentEnd);
}

TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_not_finalise_block_beyond_pending_chain)
Expand Down Expand Up @@ -1630,12 +1531,7 @@ TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_not_finalise_block_beyon
// finalise the entire chain
index_t blockToFinalise = numBlocks;

check_leaf_keys_are_present(db, 0, (numBlocks * blockSize) - 1, values);

finalise_block(tree, blockToFinalise);

index_t expectedNotPresentEnd = (blockToFinalise * blockSize) - 1;
check_leaf_keys_are_not_present(db, 0, expectedNotPresentEnd);
}

TEST_F(PersistedContentAddressedAppendOnlyTreeTest, can_not_fork_from_unwound_blocks)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,9 @@ bool LMDBTransaction::get_value(std::vector<uint8_t>& key, std::vector<uint8_t>&
{
return lmdb_queries::get_value(key, data, db, *this);
}

bool LMDBTransaction::get_value(std::vector<uint8_t>& key, index_t& data, const LMDBDatabase& db) const
{
return lmdb_queries::get_value(key, data, db, *this);
}
} // namespace bb::crypto::merkle_tree
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "barretenberg/crypto/merkle_tree/lmdb_store/lmdb_database.hpp"
#include "barretenberg/crypto/merkle_tree/lmdb_store/lmdb_environment.hpp"
#include "barretenberg/crypto/merkle_tree/lmdb_store/queries.hpp"
#include "lmdb.h"
#include <functional>
#include <vector>

Expand Down Expand Up @@ -37,16 +38,18 @@ class LMDBTransaction {
*/
virtual void abort();

template <typename T>
template <typename T, typename K>
bool get_value_or_previous(T& key,
std::vector<uint8_t>& data,
K& data,
const LMDBDatabase& db,
const std::function<bool(const std::vector<uint8_t>&)>& is_valid) const;
const std::function<bool(const MDB_val&)>& is_valid) const;

template <typename T> bool get_value_or_previous(T& key, std::vector<uint8_t>& data, const LMDBDatabase& db) const;
template <typename T, typename K> bool get_value_or_previous(T& key, K& data, const LMDBDatabase& db) const;

template <typename T> bool get_value(T& key, std::vector<uint8_t>& data, const LMDBDatabase& db) const;

template <typename T> bool get_value(T& key, index_t& data, const LMDBDatabase& db) const;

template <typename T>
void get_all_values_greater_or_equal_key(const T& key,
std::vector<std::vector<uint8_t>>& data,
Expand All @@ -59,6 +62,8 @@ class LMDBTransaction {

bool get_value(std::vector<uint8_t>& key, std::vector<uint8_t>& data, const LMDBDatabase& db) const;

bool get_value(std::vector<uint8_t>& key, index_t& data, const LMDBDatabase& db) const;

protected:
std::shared_ptr<LMDBEnvironment> _environment;
MDB_txn* _transaction;
Expand All @@ -71,17 +76,23 @@ template <typename T> bool LMDBTransaction::get_value(T& key, std::vector<uint8_
return get_value(keyBuffer, data, db);
}

template <typename T>
bool LMDBTransaction::get_value_or_previous(T& key, std::vector<uint8_t>& data, const LMDBDatabase& db) const
template <typename T> bool LMDBTransaction::get_value(T& key, index_t& data, const LMDBDatabase& db) const
{
std::vector<uint8_t> keyBuffer = serialise_key(key);
return get_value(keyBuffer, data, db);
}

template <typename T, typename K>
bool LMDBTransaction::get_value_or_previous(T& key, K& data, const LMDBDatabase& db) const
{
return lmdb_queries::get_value_or_previous(key, data, db, *this);
}

template <typename T>
template <typename T, typename K>
bool LMDBTransaction::get_value_or_previous(T& key,
std::vector<uint8_t>& data,
K& data,
const LMDBDatabase& db,
const std::function<bool(const std::vector<uint8_t>&)>& is_valid) const
const std::function<bool(const MDB_val&)>& is_valid) const
{
return lmdb_queries::get_value_or_previous(key, data, db, is_valid, *this);
}
Expand Down
Loading

1 comment on commit ece1d45

@AztecBot
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'C++ Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.05.

Benchmark suite Current: ece1d45 Previous: dc528da Ratio
wasmconstruct_proof_ultrahonk_power_of_2/20 16486.247683999998 ms/iter 15226.159180999999 ms/iter 1.08

This comment was automatically generated by workflow using github-action-benchmark.

CC: @ludamad @codygunton

Please sign in to comment.