Skip to content

Commit

Permalink
fix(merkle-tree): Repair stale keys for tree in background (#3200)
Browse files Browse the repository at this point in the history
## What ❔

Implements a background task to remove bogus stale keys for the Merkle
tree.

## Why ❔

These keys could have been produced during L1 batch reverts before
#3178.

## Checklist

- [x] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [x] Tests for the changes have been added / updated.
- [x] Documentation comments have been added / updated.
- [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev
lint`.
  • Loading branch information
slowli authored Nov 12, 2024
1 parent 8be0c65 commit 363b4f0
Show file tree
Hide file tree
Showing 21 changed files with 923 additions and 41 deletions.
9 changes: 9 additions & 0 deletions core/bin/external_node/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,9 @@ pub(crate) struct OptionalENConfig {
/// Timeout to wait for the Merkle tree database to run compaction on stalled writes.
#[serde(default = "OptionalENConfig::default_merkle_tree_stalled_writes_timeout_sec")]
merkle_tree_stalled_writes_timeout_sec: u64,
/// Enables the stale keys repair task for the Merkle tree.
#[serde(default)]
pub merkle_tree_repair_stale_keys: bool,

// Postgres config (new parameters)
/// Threshold in milliseconds for the DB connection lifetime to denote it as long-living and log its details.
Expand Down Expand Up @@ -639,6 +642,12 @@ impl OptionalENConfig {
merkle_tree.stalled_writes_timeout_sec,
default_merkle_tree_stalled_writes_timeout_sec
),
merkle_tree_repair_stale_keys: general_config
.db_config
.as_ref()
.map_or(false, |config| {
config.experimental.merkle_tree_repair_stale_keys
}),
database_long_connection_threshold_ms: load_config!(
general_config.postgres_config,
long_connection_threshold_ms
Expand Down
5 changes: 5 additions & 0 deletions core/bin/external_node/src/node_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,11 @@ impl ExternalNodeBuilder {
layer = layer.with_tree_api_config(merkle_tree_api_config);
}

// Add stale keys repair task if requested.
if self.config.optional.merkle_tree_repair_stale_keys {
layer = layer.with_stale_keys_repair();
}

// Add tree pruning if needed.
if self.config.optional.pruning_enabled {
layer = layer.with_pruning_config(self.config.optional.pruning_removal_delay());
Expand Down
4 changes: 4 additions & 0 deletions core/lib/config/src/configs/experimental.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ pub struct ExperimentalDBConfig {
/// correspondingly; otherwise, RocksDB performance can significantly degrade.
#[serde(default)]
pub include_indices_and_filters_in_block_cache: bool,
/// Enables the stale keys repair task for the Merkle tree.
#[serde(default)]
pub merkle_tree_repair_stale_keys: bool,
}

impl Default for ExperimentalDBConfig {
Expand All @@ -40,6 +43,7 @@ impl Default for ExperimentalDBConfig {
protective_reads_persistence_enabled: false,
processing_delay_ms: Self::default_merkle_tree_processing_delay_ms(),
include_indices_and_filters_in_block_cache: false,
merkle_tree_repair_stale_keys: false,
}
}
}
Expand Down
1 change: 1 addition & 0 deletions core/lib/config/src/testonly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ impl Distribution<configs::ExperimentalDBConfig> for EncodeDist {
protective_reads_persistence_enabled: self.sample(rng),
processing_delay_ms: self.sample(rng),
include_indices_and_filters_in_block_cache: self.sample(rng),
merkle_tree_repair_stale_keys: self.sample(rng),
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions core/lib/env_config/src/database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ mod tests {
DATABASE_MERKLE_TREE_MAX_L1_BATCHES_PER_ITER=50
DATABASE_EXPERIMENTAL_STATE_KEEPER_DB_BLOCK_CACHE_CAPACITY_MB=64
DATABASE_EXPERIMENTAL_STATE_KEEPER_DB_MAX_OPEN_FILES=100
DATABASE_EXPERIMENTAL_MERKLE_TREE_REPAIR_STALE_KEYS=true
"#;
lock.set_env(config);

Expand All @@ -109,6 +110,7 @@ mod tests {
db_config.experimental.state_keeper_db_max_open_files,
NonZeroU32::new(100)
);
assert!(db_config.experimental.merkle_tree_repair_stale_keys);
}

#[test]
Expand All @@ -118,6 +120,7 @@ mod tests {
"DATABASE_STATE_KEEPER_DB_PATH",
"DATABASE_EXPERIMENTAL_STATE_KEEPER_DB_MAX_OPEN_FILES",
"DATABASE_EXPERIMENTAL_STATE_KEEPER_DB_BLOCK_CACHE_CAPACITY_MB",
"DATABASE_EXPERIMENTAL_MERKLE_TREE_REPAIR_STALE_KEYS",
"DATABASE_MERKLE_TREE_BACKUP_PATH",
"DATABASE_MERKLE_TREE_PATH",
"DATABASE_MERKLE_TREE_MODE",
Expand All @@ -144,6 +147,7 @@ mod tests {
128
);
assert_eq!(db_config.experimental.state_keeper_db_max_open_files, None);
assert!(!db_config.experimental.merkle_tree_repair_stale_keys);

// Check that new env variable for Merkle tree path is supported
lock.set_env("DATABASE_MERKLE_TREE_PATH=/db/tree/main");
Expand Down
5 changes: 5 additions & 0 deletions core/lib/merkle_tree/src/domain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,11 @@ impl ZkSyncTreeReader {
&self.0.db
}

/// Converts this reader to the underlying DB.
pub fn into_db(self) -> RocksDBWrapper {
self.0.db
}

/// Returns the root hash and leaf count at the specified L1 batch.
pub fn root_info(&self, l1_batch_number: L1BatchNumber) -> Option<(ValueHash, u64)> {
let root = self.0.root(l1_batch_number.0.into())?;
Expand Down
16 changes: 16 additions & 0 deletions core/lib/merkle_tree/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ mod hasher;
mod metrics;
mod pruning;
pub mod recovery;
pub mod repair;
mod storage;
mod types;
mod utils;
Expand Down Expand Up @@ -200,6 +201,21 @@ impl<DB: Database, H: HashTree> MerkleTree<DB, H> {
root.unwrap_or(Root::Empty)
}

/// Incorrect version of [`Self::truncate_recent_versions()`] that doesn't remove stale keys for the truncated tree versions.
#[cfg(test)]
fn truncate_recent_versions_incorrectly(
&mut self,
retained_version_count: u64,
) -> anyhow::Result<()> {
let mut manifest = self.db.manifest().unwrap_or_default();
if manifest.version_count > retained_version_count {
manifest.version_count = retained_version_count;
let patch = PatchSet::from_manifest(manifest);
self.db.apply_patch(patch)?;
}
Ok(())
}

/// Extends this tree by creating its new version.
///
/// # Return value
Expand Down
39 changes: 5 additions & 34 deletions core/lib/merkle_tree/src/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ mod tests {
use super::*;
use crate::{
types::{Node, NodeKey},
utils::testonly::setup_tree_with_stale_keys,
Database, Key, MerkleTree, PatchSet, RocksDBWrapper, TreeEntry, ValueHash,
};

Expand Down Expand Up @@ -507,47 +508,17 @@ mod tests {
test_keys_are_removed_by_pruning_when_overwritten_in_multiple_batches(true);
}

fn test_pruning_with_truncation(db: impl PruneDatabase) {
let mut tree = MerkleTree::new(db).unwrap();
let kvs: Vec<_> = (0_u64..100)
.map(|i| TreeEntry::new(Key::from(i), i + 1, ValueHash::zero()))
.collect();
tree.extend(kvs).unwrap();

let overridden_kvs = vec![TreeEntry::new(
Key::from(0),
1,
ValueHash::repeat_byte(0xaa),
)];
tree.extend(overridden_kvs).unwrap();

let stale_keys = tree.db.stale_keys(1);
assert!(
stale_keys.iter().any(|key| !key.is_empty()),
"{stale_keys:?}"
);

// Revert `overridden_kvs`.
tree.truncate_recent_versions(1).unwrap();
assert_eq!(tree.latest_version(), Some(0));
let future_stale_keys = tree.db.stale_keys(1);
assert!(future_stale_keys.is_empty());

// Add a new version without the key. To make the matter more egregious, the inserted key
// differs from all existing keys, starting from the first nibble.
let new_key = Key::from_big_endian(&[0xaa; 32]);
let new_kvs = vec![TreeEntry::new(new_key, 101, ValueHash::repeat_byte(0xaa))];
tree.extend(new_kvs).unwrap();
assert_eq!(tree.latest_version(), Some(1));
fn test_pruning_with_truncation(mut db: impl PruneDatabase) {
setup_tree_with_stale_keys(&mut db, false);

let stale_keys = tree.db.stale_keys(1);
let stale_keys = db.stale_keys(1);
assert_eq!(stale_keys.len(), 1);
assert!(
stale_keys[0].is_empty() && stale_keys[0].version == 0,
"{stale_keys:?}"
);

let (mut pruner, _) = MerkleTreePruner::new(tree.db);
let (mut pruner, _) = MerkleTreePruner::new(db);
let prunable_version = pruner.last_prunable_version().unwrap();
assert_eq!(prunable_version, 1);
let stats = pruner
Expand Down
Loading

0 comments on commit 363b4f0

Please sign in to comment.