diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml index d860d79e06ac..0e61d7b5a99d 100644 --- a/.github/workflows/ci-core-reusable.yml +++ b/.github/workflows/ci-core-reusable.yml @@ -230,9 +230,10 @@ jobs: fi ENABLE_CONSENSUS=${{ matrix.consensus }} \ DEPLOYMENT_MODE=${{ matrix.deployment_mode }} \ + SNAPSHOTS_CREATOR_VERSION=${{ matrix.deployment_mode == 'Validium' && '0' || '1' }} \ DISABLE_TREE_DURING_PRUNING=${{ matrix.base_token == 'Eth' }} \ ETH_CLIENT_WEB3_URL="http://reth:8545" \ - PASSED_ENV_VARS="ENABLE_CONSENSUS,DEPLOYMENT_MODE,DISABLE_TREE_DURING_PRUNING,ETH_CLIENT_WEB3_URL" \ + PASSED_ENV_VARS="ENABLE_CONSENSUS,DEPLOYMENT_MODE,DISABLE_TREE_DURING_PRUNING,SNAPSHOTS_CREATOR_VERSION,ETH_CLIENT_WEB3_URL" \ ci_run yarn recovery-test snapshot-recovery-test - name: Genesis recovery test diff --git a/core/bin/external_node/src/config/mod.rs b/core/bin/external_node/src/config/mod.rs index b5b041a1fc6e..9c4e9657084f 100644 --- a/core/bin/external_node/src/config/mod.rs +++ b/core/bin/external_node/src/config/mod.rs @@ -422,7 +422,7 @@ pub(crate) struct OptionalENConfig { pub snapshots_recovery_postgres_max_concurrency: NonZeroUsize, #[serde(default)] - pub snapshot_recover_object_store: Option, + pub snapshots_recovery_object_store: Option, /// Enables pruning of the historical node state (Postgres and Merkle tree). The node will retain /// recent state and will continuously remove (prune) old enough parts of the state in the background. @@ -622,7 +622,7 @@ impl OptionalENConfig { .as_ref() .map(|a| a.enabled) .unwrap_or_default(), - snapshot_recover_object_store: load_config!( + snapshots_recovery_object_store: load_config!( general_config.snapshot_recovery, object_store ), @@ -808,7 +808,7 @@ impl OptionalENConfig { let mut result: OptionalENConfig = envy::prefixed("EN_") .from_env() .context("could not load external node config")?; - result.snapshot_recover_object_store = snapshot_recovery_object_store_config().ok(); + result.snapshots_recovery_object_store = snapshot_recovery_object_store_config().ok(); Ok(result) } @@ -1041,6 +1041,10 @@ pub(crate) struct ExperimentalENConfig { // Snapshot recovery /// L1 batch number of the snapshot to use during recovery. Specifying this parameter is mostly useful for testing. pub snapshots_recovery_l1_batch: Option, + /// Enables dropping storage key preimages when recovering storage logs from a snapshot with version 0. + /// This is a temporary flag that will eventually be removed together with version 0 snapshot support. + #[serde(default)] + pub snapshots_recovery_drop_storage_key_preimages: bool, /// Approximate chunk size (measured in the number of entries) to recover in a single iteration. /// Reasonable values are order of 100,000 (meaning an iteration takes several seconds). /// @@ -1077,6 +1081,7 @@ impl ExperimentalENConfig { Self::default_state_keeper_db_block_cache_capacity_mb(), state_keeper_db_max_open_files: None, snapshots_recovery_l1_batch: None, + snapshots_recovery_drop_storage_key_preimages: false, snapshots_recovery_tree_chunk_size: Self::default_snapshots_recovery_tree_chunk_size(), snapshots_recovery_tree_parallel_persistence_buffer: None, commitment_generator_max_parallelism: None, @@ -1095,7 +1100,6 @@ impl ExperimentalENConfig { experimental.state_keeper_db_block_cache_capacity_mb, default_state_keeper_db_block_cache_capacity_mb ), - state_keeper_db_max_open_files: load_config!( general_config.db_config, experimental.state_keeper_db_max_open_files @@ -1110,6 +1114,10 @@ impl ExperimentalENConfig { general_config.snapshot_recovery, tree.parallel_persistence_buffer ), + snapshots_recovery_drop_storage_key_preimages: general_config + .snapshot_recovery + .as_ref() + .map_or(false, |config| config.drop_storage_key_preimages), commitment_generator_max_parallelism: general_config .commitment_generator .as_ref() diff --git a/core/bin/external_node/src/init.rs b/core/bin/external_node/src/init.rs index ddf83a1f5581..28f9aa2c422b 100644 --- a/core/bin/external_node/src/init.rs +++ b/core/bin/external_node/src/init.rs @@ -17,6 +17,7 @@ use zksync_web3_decl::client::{DynClient, L2}; pub(crate) struct SnapshotRecoveryConfig { /// If not specified, the latest snapshot will be used. pub snapshot_l1_batch_override: Option, + pub drop_storage_key_preimages: bool, pub object_store_config: Option, } @@ -111,6 +112,10 @@ pub(crate) async fn ensure_storage_initialized( ); snapshots_applier_task.set_snapshot_l1_batch(snapshot_l1_batch); } + if recovery_config.drop_storage_key_preimages { + tracing::info!("Dropping storage key preimages for snapshot storage logs"); + snapshots_applier_task.drop_storage_key_preimages(); + } app_health.insert_component(snapshots_applier_task.health_check())?; let recovery_started_at = Instant::now(); diff --git a/core/bin/external_node/src/main.rs b/core/bin/external_node/src/main.rs index 0b3854b03c05..bb19b5670aac 100644 --- a/core/bin/external_node/src/main.rs +++ b/core/bin/external_node/src/main.rs @@ -971,7 +971,10 @@ async fn run_node( .snapshots_recovery_enabled .then_some(SnapshotRecoveryConfig { snapshot_l1_batch_override: config.experimental.snapshots_recovery_l1_batch, - object_store_config: config.optional.snapshot_recover_object_store.clone(), + drop_storage_key_preimages: config + .experimental + .snapshots_recovery_drop_storage_key_preimages, + object_store_config: config.optional.snapshots_recovery_object_store.clone(), }); ensure_storage_initialized( connection_pool.clone(), diff --git a/core/bin/snapshots_creator/README.md b/core/bin/snapshots_creator/README.md index 5d9b599599c1..26ebbb6d652a 100644 --- a/core/bin/snapshots_creator/README.md +++ b/core/bin/snapshots_creator/README.md @@ -51,9 +51,9 @@ Creating a snapshot is a part of the [snapshot recovery integration test]. You c Each snapshot consists of three types of data (see [`snapshots.rs`] for exact definitions): -- **Header:** Includes basic information, such as the miniblock / L1 batch of the snapshot, miniblock / L1 batch - timestamps, miniblock hash and L1 batch root hash. Returned by the methods in the `snapshots` namespace of the - JSON-RPC API of the main node. +- **Header:** Includes basic information, such as the L2 block / L1 batch of the snapshot, L2 block / L1 batch + timestamps, L2 block hash and L1 batch root hash. Returned by the methods in the `snapshots` namespace of the JSON-RPC + API of the main node. - **Storage log chunks:** Latest values for all VM storage slots ever written to at the time the snapshot is made. Besides key–value pairs, each storage log record also contains the L1 batch number of its initial write and its enumeration index; both are used to restore the contents of the `initial_writes` table. Chunking storage logs is @@ -64,6 +64,16 @@ Each snapshot consists of three types of data (see [`snapshots.rs`] for exact de - **Factory dependencies:** All bytecodes deployed on L2 at the time the snapshot is made. Stored as a single gzipped Protobuf message in an object store. +### Versioning + +There are currently 2 versions of the snapshot format which differ in how keys are mentioned in storage logs. + +- Version 0 includes key preimages (EVM-compatible keys), i.e. address / contract slot tuples. +- Version 1 includes only hashed keys as used in Era ZKP circuits and in the Merkle tree. Besides reducing the snapshot + size (with the change, keys occupy 32 bytes instead of 52), this allows to unify snapshot recovery with recovery from + L1 data. Having only hashed keys for snapshot storage logs is safe; key preimages are only required for a couple of + components to sort keys in a batch, but these cases only require preimages for L1 batches locally executed on a node. + [`snapshots.rs`]: ../../lib/types/src/snapshots.rs [object store]: ../../lib/object_store [snapshot recovery integration test]: ../../tests/recovery-test/tests/snapshot-recovery.test.ts diff --git a/core/bin/snapshots_creator/src/creator.rs b/core/bin/snapshots_creator/src/creator.rs index 597f6168b93a..18212a7d2055 100644 --- a/core/bin/snapshots_creator/src/creator.rs +++ b/core/bin/snapshots_creator/src/creator.rs @@ -1,16 +1,17 @@ //! [`SnapshotCreator`] and tightly related types. -use std::sync::Arc; +use std::{sync::Arc, time::Duration}; use anyhow::Context as _; use tokio::sync::Semaphore; use zksync_config::SnapshotsCreatorConfig; use zksync_dal::{Connection, ConnectionPool, Core, CoreDal, DalResult}; -use zksync_object_store::ObjectStore; +use zksync_object_store::{ObjectStore, StoredObject}; use zksync_types::{ snapshots::{ uniform_hashed_keys_chunk, SnapshotFactoryDependencies, SnapshotFactoryDependency, - SnapshotMetadata, SnapshotStorageLogsChunk, SnapshotStorageLogsStorageKey, SnapshotVersion, + SnapshotMetadata, SnapshotStorageLog, SnapshotStorageLogsChunk, + SnapshotStorageLogsStorageKey, SnapshotVersion, }, L1BatchNumber, L2BlockNumber, }; @@ -22,6 +23,7 @@ use crate::tests::HandleEvent; /// Encapsulates progress of creating a particular storage snapshot. #[derive(Debug)] struct SnapshotProgress { + version: SnapshotVersion, l1_batch_number: L1BatchNumber, /// `true` if the snapshot is new (i.e., its progress is not recovered from Postgres). is_new_snapshot: bool, @@ -30,8 +32,9 @@ struct SnapshotProgress { } impl SnapshotProgress { - fn new(l1_batch_number: L1BatchNumber, chunk_count: u64) -> Self { + fn new(version: SnapshotVersion, l1_batch_number: L1BatchNumber, chunk_count: u64) -> Self { Self { + version, l1_batch_number, is_new_snapshot: true, chunk_count, @@ -48,6 +51,7 @@ impl SnapshotProgress { .collect(); Self { + version: snapshot.version, l1_batch_number: snapshot.l1_batch_number, is_new_snapshot: false, chunk_count: snapshot.storage_logs_filepaths.len() as u64, @@ -76,11 +80,13 @@ impl SnapshotCreator { async fn process_storage_logs_single_chunk( &self, semaphore: &Semaphore, + progress: &SnapshotProgress, l2_block_number: L2BlockNumber, - l1_batch_number: L1BatchNumber, chunk_id: u64, - chunk_count: u64, ) -> anyhow::Result<()> { + let chunk_count = progress.chunk_count; + let l1_batch_number = progress.l1_batch_number; + let _permit = semaphore.acquire().await?; #[cfg(test)] if self.event_listener.on_chunk_started().should_exit() { @@ -92,35 +98,45 @@ impl SnapshotCreator { let latency = METRICS.storage_logs_processing_duration[&StorageChunkStage::LoadFromPostgres].start(); - let logs = conn - .snapshots_creator_dal() - .get_storage_logs_chunk(l2_block_number, l1_batch_number, hashed_keys_range) - .await - .context("Error fetching storage logs count")?; - drop(conn); - let latency = latency.observe(); - tracing::info!( - "Loaded chunk {chunk_id} ({} logs) from Postgres in {latency:?}", - logs.len() - ); - - let latency = - METRICS.storage_logs_processing_duration[&StorageChunkStage::SaveToGcs].start(); - let storage_logs_chunk = SnapshotStorageLogsChunk { storage_logs: logs }; - let key = SnapshotStorageLogsStorageKey { - l1_batch_number, - chunk_id, + let (output_filepath, latency) = match progress.version { + SnapshotVersion::Version0 => { + #[allow(deprecated)] // support of version 0 snapshots will be removed eventually + let logs = conn + .snapshots_creator_dal() + .get_storage_logs_chunk_with_key_preimages( + l2_block_number, + l1_batch_number, + hashed_keys_range, + ) + .await + .context("error fetching storage logs")?; + drop(conn); + + let latency = latency.observe(); + tracing::info!( + "Loaded chunk {chunk_id} ({} logs) from Postgres in {latency:?}", + logs.len() + ); + self.store_storage_logs_chunk(l1_batch_number, chunk_id, logs) + .await? + } + SnapshotVersion::Version1 => { + let logs = conn + .snapshots_creator_dal() + .get_storage_logs_chunk(l2_block_number, l1_batch_number, hashed_keys_range) + .await + .context("error fetching storage logs")?; + drop(conn); + + let latency = latency.observe(); + tracing::info!( + "Loaded chunk {chunk_id} ({} logs) from Postgres in {latency:?}", + logs.len() + ); + self.store_storage_logs_chunk(l1_batch_number, chunk_id, logs) + .await? + } }; - let filename = self - .blob_store - .put(key, &storage_logs_chunk) - .await - .context("Error storing storage logs chunk in blob store")?; - let output_filepath_prefix = self - .blob_store - .get_storage_prefix::(); - let output_filepath = format!("{output_filepath_prefix}/{filename}"); - let latency = latency.observe(); let mut master_conn = self .master_pool @@ -141,6 +157,35 @@ impl SnapshotCreator { Ok(()) } + async fn store_storage_logs_chunk( + &self, + l1_batch_number: L1BatchNumber, + chunk_id: u64, + logs: Vec>, + ) -> anyhow::Result<(String, Duration)> + where + for<'a> SnapshotStorageLogsChunk: StoredObject = SnapshotStorageLogsStorageKey>, + { + let latency = + METRICS.storage_logs_processing_duration[&StorageChunkStage::SaveToGcs].start(); + let storage_logs_chunk = SnapshotStorageLogsChunk { storage_logs: logs }; + let key = SnapshotStorageLogsStorageKey { + l1_batch_number, + chunk_id, + }; + let filename = self + .blob_store + .put(key, &storage_logs_chunk) + .await + .context("Error storing storage logs chunk in blob store")?; + let output_filepath_prefix = self + .blob_store + .get_storage_prefix::>(); + let output_filepath = format!("{output_filepath_prefix}/{filename}"); + let latency = latency.observe(); + Ok((output_filepath, latency)) + } + async fn process_factory_deps( &self, l2_block_number: L2BlockNumber, @@ -190,18 +235,12 @@ impl SnapshotCreator { /// Returns `Ok(None)` if the created snapshot would coincide with `latest_snapshot`. async fn initialize_snapshot_progress( config: &SnapshotsCreatorConfig, + l1_batch_number: L1BatchNumber, min_chunk_count: u64, - latest_snapshot: Option<&SnapshotMetadata>, conn: &mut Connection<'_, Core>, ) -> anyhow::Result> { - // We subtract 1 so that after restore, EN node has at least one L1 batch to fetch. - let sealed_l1_batch_number = conn.blocks_dal().get_sealed_l1_batch_number().await?; - let sealed_l1_batch_number = sealed_l1_batch_number.context("No L1 batches in Postgres")?; - anyhow::ensure!( - sealed_l1_batch_number != L1BatchNumber(0), - "Cannot create snapshot when only the genesis L1 batch is present in Postgres" - ); - let l1_batch_number = sealed_l1_batch_number - 1; + let snapshot_version = SnapshotVersion::try_from(config.version) + .context("invalid snapshot version specified in config")?; // Sanity check: the selected L1 batch should have Merkle tree data; otherwise, it could be impossible // to recover from the generated snapshot. @@ -215,15 +254,6 @@ impl SnapshotCreator { ) })?; - let latest_snapshot_l1_batch_number = - latest_snapshot.map(|snapshot| snapshot.l1_batch_number); - if latest_snapshot_l1_batch_number == Some(l1_batch_number) { - tracing::info!( - "Snapshot at expected L1 batch #{l1_batch_number} is already created; exiting" - ); - return Ok(None); - } - let distinct_storage_logs_keys_count = conn .snapshots_creator_dal() .get_distinct_storage_logs_keys_count(l1_batch_number) @@ -238,7 +268,11 @@ impl SnapshotCreator { "Selected storage logs chunking for L1 batch {l1_batch_number}: \ {chunk_count} chunks of expected size {chunk_size}" ); - Ok(Some(SnapshotProgress::new(l1_batch_number, chunk_count))) + Ok(Some(SnapshotProgress::new( + snapshot_version, + l1_batch_number, + chunk_count, + ))) } /// Returns `Ok(None)` if a snapshot should not be created / resumed. @@ -251,25 +285,59 @@ impl SnapshotCreator { .master_pool .connection_tagged("snapshots_creator") .await?; - let latest_snapshot = master_conn + + let sealed_l1_batch_number = master_conn + .blocks_dal() + .get_sealed_l1_batch_number() + .await?; + let sealed_l1_batch_number = sealed_l1_batch_number.context("No L1 batches in Postgres")?; + let requested_l1_batch_number = if let Some(l1_batch_number) = config.l1_batch_number { + anyhow::ensure!( + l1_batch_number <= sealed_l1_batch_number, + "Requested a snapshot for L1 batch #{l1_batch_number} that doesn't exist in Postgres (latest L1 batch: {sealed_l1_batch_number})" + ); + l1_batch_number + } else { + // We subtract 1 so that after restore, EN node has at least one L1 batch to fetch. + anyhow::ensure!( + sealed_l1_batch_number != L1BatchNumber(0), + "Cannot create snapshot when only the genesis L1 batch is present in Postgres" + ); + sealed_l1_batch_number - 1 + }; + + let existing_snapshot = master_conn .snapshots_dal() - .get_newest_snapshot_metadata() + .get_snapshot_metadata(requested_l1_batch_number) .await?; drop(master_conn); - let pending_snapshot = latest_snapshot - .as_ref() - .filter(|snapshot| !snapshot.is_complete()); - if let Some(snapshot) = pending_snapshot { - Ok(Some(SnapshotProgress::from_existing_snapshot(snapshot))) - } else { - Self::initialize_snapshot_progress( - config, - min_chunk_count, - latest_snapshot.as_ref(), - &mut self.connect_to_replica().await?, - ) - .await + match existing_snapshot { + Some(snapshot) if snapshot.is_complete() => { + tracing::info!("Snapshot for the requested L1 batch is complete: {snapshot:?}"); + Ok(None) + } + Some(snapshot) if config.l1_batch_number.is_some() => { + Ok(Some(SnapshotProgress::from_existing_snapshot(&snapshot))) + } + Some(snapshot) => { + // Unless creating a snapshot for a specific L1 batch is requested, we never continue an existing snapshot, even if it's incomplete. + // This it to make running multiple snapshot creator instances in parallel easier to reason about. + tracing::warn!( + "Snapshot at expected L1 batch #{requested_l1_batch_number} exists, but is incomplete: {snapshot:?}. If you need to resume creating it, \ + specify the L1 batch number in the snapshot creator config" + ); + Ok(None) + } + None => { + Self::initialize_snapshot_progress( + config, + requested_l1_batch_number, + min_chunk_count, + &mut self.connect_to_replica().await?, + ) + .await + } } } @@ -319,7 +387,7 @@ impl SnapshotCreator { master_conn .snapshots_dal() .add_snapshot( - SnapshotVersion::Version0, + progress.version, progress.l1_batch_number, progress.chunk_count, &factory_deps_output_file, @@ -331,15 +399,18 @@ impl SnapshotCreator { .storage_logs_chunks_left_to_process .set(progress.remaining_chunk_ids.len()); let semaphore = Semaphore::new(config.concurrent_queries_count as usize); - let tasks = progress.remaining_chunk_ids.into_iter().map(|chunk_id| { - self.process_storage_logs_single_chunk( - &semaphore, - last_l2_block_number_in_batch, - progress.l1_batch_number, - chunk_id, - progress.chunk_count, - ) - }); + let tasks = progress + .remaining_chunk_ids + .iter() + .copied() + .map(|chunk_id| { + self.process_storage_logs_single_chunk( + &semaphore, + &progress, + last_l2_block_number_in_batch, + chunk_id, + ) + }); futures::future::try_join_all(tasks).await?; METRICS diff --git a/core/bin/snapshots_creator/src/tests.rs b/core/bin/snapshots_creator/src/tests.rs index 4fd553d0348d..1c26f1081598 100644 --- a/core/bin/snapshots_creator/src/tests.rs +++ b/core/bin/snapshots_creator/src/tests.rs @@ -25,14 +25,15 @@ use zksync_types::{ use super::*; const TEST_CONFIG: SnapshotsCreatorConfig = SnapshotsCreatorConfig { + version: 1, + l1_batch_number: None, storage_logs_chunk_size: 1_000_000, concurrent_queries_count: 10, object_store: None, }; const SEQUENTIAL_TEST_CONFIG: SnapshotsCreatorConfig = SnapshotsCreatorConfig { - storage_logs_chunk_size: 1_000_000, concurrent_queries_count: 1, - object_store: None, + ..TEST_CONFIG }; #[derive(Debug)] @@ -181,6 +182,7 @@ async fn create_l1_batch( let mut written_keys: Vec<_> = logs_for_initial_writes.iter().map(|log| log.key).collect(); written_keys.sort_unstable(); + let written_keys: Vec<_> = written_keys.iter().map(StorageKey::hashed_key).collect(); conn.storage_logs_dedup_dal() .insert_initial_writes(l1_batch_number, &written_keys) .await @@ -241,7 +243,7 @@ async fn prepare_postgres( let (l1_batch_number_of_initial_write, enumeration_index) = expected_l1_batches_and_indices[&log.key.hashed_key()]; SnapshotStorageLog { - key: log.key, + key: log.key.hashed_key(), value: log.value, l1_batch_number_of_initial_write, enumeration_index, @@ -338,6 +340,29 @@ async fn persisting_snapshot_logs() { assert_storage_logs(&*object_store, snapshot_l1_batch_number, &expected_outputs).await; } +#[tokio::test] +async fn persisting_snapshot_logs_with_specified_l1_batch() { + let pool = ConnectionPool::::test_pool().await; + let mut rng = thread_rng(); + let object_store = MockObjectStore::arc(); + let mut conn = pool.connection().await.unwrap(); + let expected_outputs = prepare_postgres(&mut rng, &mut conn, 10).await; + + // L1 batch numbers are intentionally not ordered + for snapshot_l1_batch_number in [7, 1, 4, 6] { + let snapshot_l1_batch_number = L1BatchNumber(snapshot_l1_batch_number); + let mut config = TEST_CONFIG; + config.l1_batch_number = Some(snapshot_l1_batch_number); + + SnapshotCreator::for_tests(object_store.clone(), pool.clone()) + .run(config, MIN_CHUNK_COUNT) + .await + .unwrap(); + + assert_storage_logs(&*object_store, snapshot_l1_batch_number, &expected_outputs).await; + } +} + async fn assert_storage_logs( object_store: &dyn ObjectStore, snapshot_l1_batch_number: L1BatchNumber, @@ -350,7 +375,56 @@ async fn assert_storage_logs( chunk_id, }; let chunk: SnapshotStorageLogsChunk = object_store.get(key).await.unwrap(); - actual_logs.extend(chunk.storage_logs.into_iter()); + actual_logs.extend(chunk.storage_logs); + } + let expected_logs: HashSet<_> = expected_outputs + .storage_logs + .iter() + .filter(|log| log.l1_batch_number_of_initial_write <= snapshot_l1_batch_number) + .cloned() + .collect(); + assert_eq!(actual_logs, expected_logs); +} + +#[tokio::test] +async fn persisting_snapshot_logs_for_v0_snapshot() { + let pool = ConnectionPool::::test_pool().await; + let mut rng = thread_rng(); + let object_store = MockObjectStore::arc(); + let mut conn = pool.connection().await.unwrap(); + let expected_outputs = prepare_postgres(&mut rng, &mut conn, 10).await; + + let config = SnapshotsCreatorConfig { + version: 0, + ..TEST_CONFIG + }; + SnapshotCreator::for_tests(object_store.clone(), pool.clone()) + .run(config, MIN_CHUNK_COUNT) + .await + .unwrap(); + let snapshot_l1_batch_number = L1BatchNumber(8); + + // Logs must be compatible with version 1 `SnapshotStorageLog` format + assert_storage_logs(&*object_store, snapshot_l1_batch_number, &expected_outputs).await; + + // ...and must be compatible with version 0 format as well + let mut actual_logs = HashSet::new(); + for chunk_id in 0..MIN_CHUNK_COUNT { + let key = SnapshotStorageLogsStorageKey { + l1_batch_number: snapshot_l1_batch_number, + chunk_id, + }; + let chunk: SnapshotStorageLogsChunk = object_store.get(key).await.unwrap(); + let logs_with_hashed_key = chunk + .storage_logs + .into_iter() + .map(|log| SnapshotStorageLog { + key: log.key.hashed_key(), + value: log.value, + l1_batch_number_of_initial_write: log.l1_batch_number_of_initial_write, + enumeration_index: log.enumeration_index, + }); + actual_logs.extend(logs_with_hashed_key); } assert_eq!(actual_logs, expected_outputs.storage_logs); } @@ -386,12 +460,36 @@ async fn recovery_workflow() { let actual_deps: HashSet<_> = factory_deps.into_iter().collect(); assert_eq!(actual_deps, expected_outputs.deps); - // Process 2 storage log chunks, then stop. + // Check that the creator does nothing unless it's requested to create a new snapshot. SnapshotCreator::for_tests(object_store.clone(), pool.clone()) .stop_after_chunk_count(2) .run(SEQUENTIAL_TEST_CONFIG, MIN_CHUNK_COUNT) .await .unwrap(); + let snapshot_metadata = conn + .snapshots_dal() + .get_snapshot_metadata(snapshot_l1_batch_number) + .await + .unwrap() + .expect("No snapshot metadata"); + assert!( + snapshot_metadata + .storage_logs_filepaths + .iter() + .all(Option::is_none), + "{snapshot_metadata:?}" + ); + + // Process 2 storage log chunks, then stop. + let recovery_config = SnapshotsCreatorConfig { + l1_batch_number: Some(snapshot_l1_batch_number), + ..SEQUENTIAL_TEST_CONFIG + }; + SnapshotCreator::for_tests(object_store.clone(), pool.clone()) + .stop_after_chunk_count(2) + .run(recovery_config.clone(), MIN_CHUNK_COUNT) + .await + .unwrap(); let snapshot_metadata = conn .snapshots_dal() @@ -410,7 +508,7 @@ async fn recovery_workflow() { // Process the remaining chunks. SnapshotCreator::for_tests(object_store.clone(), pool.clone()) - .run(SEQUENTIAL_TEST_CONFIG, MIN_CHUNK_COUNT) + .run(recovery_config, MIN_CHUNK_COUNT) .await .unwrap(); @@ -425,13 +523,17 @@ async fn recovery_workflow_with_varying_chunk_size() { let mut conn = pool.connection().await.unwrap(); let expected_outputs = prepare_postgres(&mut rng, &mut conn, 10).await; + // Specifying the snapshot L1 batch right away should work fine. + let snapshot_l1_batch_number = L1BatchNumber(8); + let mut config = SEQUENTIAL_TEST_CONFIG; + config.l1_batch_number = Some(snapshot_l1_batch_number); + SnapshotCreator::for_tests(object_store.clone(), pool.clone()) .stop_after_chunk_count(2) - .run(SEQUENTIAL_TEST_CONFIG, MIN_CHUNK_COUNT) + .run(config.clone(), MIN_CHUNK_COUNT) .await .unwrap(); - let snapshot_l1_batch_number = L1BatchNumber(8); let snapshot_metadata = conn .snapshots_dal() .get_snapshot_metadata(snapshot_l1_batch_number) @@ -447,14 +549,24 @@ async fn recovery_workflow_with_varying_chunk_size() { 2 ); - let config_with_other_size = SnapshotsCreatorConfig { - storage_logs_chunk_size: 1, // << should be ignored - ..SEQUENTIAL_TEST_CONFIG - }; + config.storage_logs_chunk_size = 1; SnapshotCreator::for_tests(object_store.clone(), pool.clone()) - .run(config_with_other_size, MIN_CHUNK_COUNT) + .run(config, MIN_CHUNK_COUNT) .await .unwrap(); assert_storage_logs(&*object_store, snapshot_l1_batch_number, &expected_outputs).await; } + +#[tokio::test] +async fn creator_fails_if_specified_l1_batch_is_missing() { + let pool = ConnectionPool::::test_pool().await; + let object_store = MockObjectStore::arc(); + + let mut config = SEQUENTIAL_TEST_CONFIG; + config.l1_batch_number = Some(L1BatchNumber(20)); + SnapshotCreator::for_tests(object_store.clone(), pool.clone()) + .run(config, MIN_CHUNK_COUNT) + .await + .unwrap_err(); +} diff --git a/core/lib/config/src/configs/snapshot_recovery.rs b/core/lib/config/src/configs/snapshot_recovery.rs index ba26583a8a63..c1d5ea6e3ac6 100644 --- a/core/lib/config/src/configs/snapshot_recovery.rs +++ b/core/lib/config/src/configs/snapshot_recovery.rs @@ -38,6 +38,10 @@ pub struct SnapshotRecoveryConfig { pub enabled: bool, /// L1 batch number of the snapshot to use during recovery. Specifying this parameter is mostly useful for testing. pub l1_batch: Option, + /// Enables dropping storage key preimages when recovering storage logs from a snapshot with version 0. + /// This is a temporary flag that will eventually be removed together with version 0 snapshot support. + #[serde(default)] + pub drop_storage_key_preimages: bool, pub tree: TreeRecoveryConfig, pub postgres: PostgresRecoveryConfig, pub object_store: Option, diff --git a/core/lib/config/src/configs/snapshots_creator.rs b/core/lib/config/src/configs/snapshots_creator.rs index 7d297f607803..c7dc39e41ef5 100644 --- a/core/lib/config/src/configs/snapshots_creator.rs +++ b/core/lib/config/src/configs/snapshots_creator.rs @@ -1,21 +1,34 @@ use serde::Deserialize; +use zksync_basic_types::L1BatchNumber; use crate::ObjectStoreConfig; #[derive(Debug, Clone, PartialEq, Deserialize)] pub struct SnapshotsCreatorConfig { - #[serde(default = "snapshots_creator_storage_logs_chunk_size_default")] + /// Version of snapshots to create. + // Raw integer version is used because `SnapshotVersion` is defined in `zksync_types` crate. + #[serde(default)] + pub version: u16, + /// L1 batch number to create the snapshot for. If not specified, a snapshot will be created + /// for the current penultimate L1 batch. + /// + /// - If a snapshot with this L1 batch already exists and is complete, the creator will do nothing. + /// - If a snapshot with this L1 batch exists and is incomplete, the creator will continue creating it, + /// regardless of whether the specified snapshot `version` matches. + pub l1_batch_number: Option, + #[serde(default = "SnapshotsCreatorConfig::storage_logs_chunk_size_default")] pub storage_logs_chunk_size: u64, - - #[serde(default = "snapshots_creator_concurrent_queries_count")] + #[serde(default = "SnapshotsCreatorConfig::concurrent_queries_count")] pub concurrent_queries_count: u32, pub object_store: Option, } -fn snapshots_creator_storage_logs_chunk_size_default() -> u64 { - 1_000_000 -} +impl SnapshotsCreatorConfig { + const fn storage_logs_chunk_size_default() -> u64 { + 1_000_000 + } -fn snapshots_creator_concurrent_queries_count() -> u32 { - 25 + const fn concurrent_queries_count() -> u32 { + 25 + } } diff --git a/core/lib/config/src/testonly.rs b/core/lib/config/src/testonly.rs index fd1059b0f32f..2c8034dfe9d6 100644 --- a/core/lib/config/src/testonly.rs +++ b/core/lib/config/src/testonly.rs @@ -6,7 +6,7 @@ use zksync_basic_types::{ commitment::L1BatchCommitmentMode, network::Network, protocol_version::{ProtocolSemanticVersion, ProtocolVersionId, VersionPatch}, - L1ChainId, L2ChainId, + L1BatchNumber, L1ChainId, L2ChainId, }; use zksync_consensus_utils::EncodeDist; @@ -641,6 +641,8 @@ impl Distribution for EncodeDist { impl Distribution for EncodeDist { fn sample(&self, rng: &mut R) -> configs::SnapshotsCreatorConfig { configs::SnapshotsCreatorConfig { + l1_batch_number: self.sample_opt(|| L1BatchNumber(rng.gen())), + version: if rng.gen() { 0 } else { 1 }, storage_logs_chunk_size: self.sample(rng), concurrent_queries_count: self.sample(rng), object_store: self.sample(rng), diff --git a/core/lib/dal/.sqlx/query-0385576f1fb3836fc04a6cde3e92c03e1de8292eb0ea1e026ba1b32a3745c261.json b/core/lib/dal/.sqlx/query-0385576f1fb3836fc04a6cde3e92c03e1de8292eb0ea1e026ba1b32a3745c261.json new file mode 100644 index 000000000000..a98cbb18034a --- /dev/null +++ b/core/lib/dal/.sqlx/query-0385576f1fb3836fc04a6cde3e92c03e1de8292eb0ea1e026ba1b32a3745c261.json @@ -0,0 +1,49 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT\n storage_logs.hashed_key AS \"hashed_key!\",\n storage_logs.value AS \"value!\",\n storage_logs.miniblock_number AS \"miniblock_number!\",\n initial_writes.l1_batch_number AS \"l1_batch_number!\",\n initial_writes.index\n FROM\n (\n SELECT\n hashed_key,\n MAX(ARRAY[miniblock_number, operation_number]::INT[]) AS op\n FROM\n storage_logs\n WHERE\n miniblock_number <= $1\n AND hashed_key >= $3\n AND hashed_key <= $4\n GROUP BY\n hashed_key\n ORDER BY\n hashed_key\n ) AS keys\n INNER JOIN storage_logs ON keys.hashed_key = storage_logs.hashed_key\n AND storage_logs.miniblock_number = keys.op[1]\n AND storage_logs.operation_number = keys.op[2]\n INNER JOIN initial_writes ON keys.hashed_key = initial_writes.hashed_key\n WHERE\n initial_writes.l1_batch_number <= $2\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "hashed_key!", + "type_info": "Bytea" + }, + { + "ordinal": 1, + "name": "value!", + "type_info": "Bytea" + }, + { + "ordinal": 2, + "name": "miniblock_number!", + "type_info": "Int8" + }, + { + "ordinal": 3, + "name": "l1_batch_number!", + "type_info": "Int8" + }, + { + "ordinal": 4, + "name": "index", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Int8", + "Int8", + "Bytea", + "Bytea" + ] + }, + "nullable": [ + false, + false, + false, + false, + false + ] + }, + "hash": "0385576f1fb3836fc04a6cde3e92c03e1de8292eb0ea1e026ba1b32a3745c261" +} diff --git a/core/lib/dal/.sqlx/query-21cfb584e3731852e96da1968503208a30b0eead764527ff957ea6e86a34eec6.json b/core/lib/dal/.sqlx/query-21cfb584e3731852e96da1968503208a30b0eead764527ff957ea6e86a34eec6.json index 6d78d4ebd2f0..541af15fa271 100644 --- a/core/lib/dal/.sqlx/query-21cfb584e3731852e96da1968503208a30b0eead764527ff957ea6e86a34eec6.json +++ b/core/lib/dal/.sqlx/query-21cfb584e3731852e96da1968503208a30b0eead764527ff957ea6e86a34eec6.json @@ -39,8 +39,8 @@ }, "nullable": [ false, - false, - false, + true, + true, false, false, false diff --git a/core/lib/dal/.sqlx/query-89d58c9735adbd9f40791d61bd63a0a2691a4b3238fce9dbc3a7d2861a4ca967.json b/core/lib/dal/.sqlx/query-89d58c9735adbd9f40791d61bd63a0a2691a4b3238fce9dbc3a7d2861a4ca967.json new file mode 100644 index 000000000000..b65b57e4e01e --- /dev/null +++ b/core/lib/dal/.sqlx/query-89d58c9735adbd9f40791d61bd63a0a2691a4b3238fce9dbc3a7d2861a4ca967.json @@ -0,0 +1,34 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT\n address AS \"address!\",\n key AS \"key!\",\n value\n FROM\n storage_logs\n WHERE\n miniblock_number BETWEEN (\n SELECT\n MIN(number)\n FROM\n miniblocks\n WHERE\n l1_batch_number = $1\n ) AND (\n SELECT\n MAX(number)\n FROM\n miniblocks\n WHERE\n l1_batch_number = $1\n )\n ORDER BY\n miniblock_number,\n operation_number\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "address!", + "type_info": "Bytea" + }, + { + "ordinal": 1, + "name": "key!", + "type_info": "Bytea" + }, + { + "ordinal": 2, + "name": "value", + "type_info": "Bytea" + } + ], + "parameters": { + "Left": [ + "Int8" + ] + }, + "nullable": [ + true, + true, + false + ] + }, + "hash": "89d58c9735adbd9f40791d61bd63a0a2691a4b3238fce9dbc3a7d2861a4ca967" +} diff --git a/core/lib/dal/.sqlx/query-d6b70256793417a949081899eccf75260c7afaf110870656061a04079c35c2d8.json b/core/lib/dal/.sqlx/query-9f29aa31d4698031e9f3fe2eb273724dcce382936af0d4c386143399995cd325.json similarity index 87% rename from core/lib/dal/.sqlx/query-d6b70256793417a949081899eccf75260c7afaf110870656061a04079c35c2d8.json rename to core/lib/dal/.sqlx/query-9f29aa31d4698031e9f3fe2eb273724dcce382936af0d4c386143399995cd325.json index c116d2d7de6e..2e1bf7c3e61c 100644 --- a/core/lib/dal/.sqlx/query-d6b70256793417a949081899eccf75260c7afaf110870656061a04079c35c2d8.json +++ b/core/lib/dal/.sqlx/query-9f29aa31d4698031e9f3fe2eb273724dcce382936af0d4c386143399995cd325.json @@ -1,21 +1,21 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n storage_logs.key AS \"key!\",\n storage_logs.value AS \"value!\",\n storage_logs.address AS \"address!\",\n storage_logs.miniblock_number AS \"miniblock_number!\",\n initial_writes.l1_batch_number AS \"l1_batch_number!\",\n initial_writes.index\n FROM\n (\n SELECT\n hashed_key,\n MAX(ARRAY[miniblock_number, operation_number]::INT[]) AS op\n FROM\n storage_logs\n WHERE\n miniblock_number <= $1\n AND hashed_key >= $3\n AND hashed_key <= $4\n GROUP BY\n hashed_key\n ORDER BY\n hashed_key\n ) AS keys\n INNER JOIN storage_logs ON keys.hashed_key = storage_logs.hashed_key\n AND storage_logs.miniblock_number = keys.op[1]\n AND storage_logs.operation_number = keys.op[2]\n INNER JOIN initial_writes ON keys.hashed_key = initial_writes.hashed_key\n WHERE\n initial_writes.l1_batch_number <= $2\n ", + "query": "\n SELECT\n storage_logs.address AS \"address!\",\n storage_logs.key AS \"key!\",\n storage_logs.value AS \"value!\",\n storage_logs.miniblock_number AS \"miniblock_number!\",\n initial_writes.l1_batch_number AS \"l1_batch_number!\",\n initial_writes.index\n FROM\n (\n SELECT\n hashed_key,\n MAX(ARRAY[miniblock_number, operation_number]::INT[]) AS op\n FROM\n storage_logs\n WHERE\n miniblock_number <= $1\n AND hashed_key >= $3\n AND hashed_key <= $4\n GROUP BY\n hashed_key\n ORDER BY\n hashed_key\n ) AS keys\n INNER JOIN storage_logs ON keys.hashed_key = storage_logs.hashed_key\n AND storage_logs.miniblock_number = keys.op[1]\n AND storage_logs.operation_number = keys.op[2]\n INNER JOIN initial_writes ON keys.hashed_key = initial_writes.hashed_key\n WHERE\n initial_writes.l1_batch_number <= $2\n ", "describe": { "columns": [ { "ordinal": 0, - "name": "key!", + "name": "address!", "type_info": "Bytea" }, { "ordinal": 1, - "name": "value!", + "name": "key!", "type_info": "Bytea" }, { "ordinal": 2, - "name": "address!", + "name": "value!", "type_info": "Bytea" }, { @@ -43,13 +43,13 @@ ] }, "nullable": [ - false, - false, + true, + true, false, false, false, false ] }, - "hash": "d6b70256793417a949081899eccf75260c7afaf110870656061a04079c35c2d8" + "hash": "9f29aa31d4698031e9f3fe2eb273724dcce382936af0d4c386143399995cd325" } diff --git a/core/lib/dal/.sqlx/query-be092376ee3aec298f8b22229abf6552b86d46808fe219c55a5210af56cce2ee.json b/core/lib/dal/.sqlx/query-be092376ee3aec298f8b22229abf6552b86d46808fe219c55a5210af56cce2ee.json new file mode 100644 index 000000000000..82c544631339 --- /dev/null +++ b/core/lib/dal/.sqlx/query-be092376ee3aec298f8b22229abf6552b86d46808fe219c55a5210af56cce2ee.json @@ -0,0 +1,28 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT\n hashed_key,\n value\n FROM\n storage_logs\n WHERE\n miniblock_number BETWEEN (\n SELECT\n MIN(number)\n FROM\n miniblocks\n WHERE\n l1_batch_number = $1\n ) AND (\n SELECT\n MAX(number)\n FROM\n miniblocks\n WHERE\n l1_batch_number = $1\n )\n ORDER BY\n miniblock_number,\n operation_number\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "hashed_key", + "type_info": "Bytea" + }, + { + "ordinal": 1, + "name": "value", + "type_info": "Bytea" + } + ], + "parameters": { + "Left": [ + "Int8" + ] + }, + "nullable": [ + false, + false + ] + }, + "hash": "be092376ee3aec298f8b22229abf6552b86d46808fe219c55a5210af56cce2ee" +} diff --git a/core/lib/dal/.sqlx/query-c36abacc705a2244d423599779e38d60d6e93bcb34fd20422e227714fccbf6b7.json b/core/lib/dal/.sqlx/query-c36abacc705a2244d423599779e38d60d6e93bcb34fd20422e227714fccbf6b7.json deleted file mode 100644 index ea4b266d8259..000000000000 --- a/core/lib/dal/.sqlx/query-c36abacc705a2244d423599779e38d60d6e93bcb34fd20422e227714fccbf6b7.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n SELECT\n address,\n key,\n value\n FROM\n storage_logs\n WHERE\n miniblock_number BETWEEN (\n SELECT\n MIN(number)\n FROM\n miniblocks\n WHERE\n l1_batch_number = $1\n ) AND (\n SELECT\n MAX(number)\n FROM\n miniblocks\n WHERE\n l1_batch_number = $1\n )\n ORDER BY\n miniblock_number,\n operation_number\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "address", - "type_info": "Bytea" - }, - { - "ordinal": 1, - "name": "key", - "type_info": "Bytea" - }, - { - "ordinal": 2, - "name": "value", - "type_info": "Bytea" - } - ], - "parameters": { - "Left": [ - "Int8" - ] - }, - "nullable": [ - false, - false, - false - ] - }, - "hash": "c36abacc705a2244d423599779e38d60d6e93bcb34fd20422e227714fccbf6b7" -} diff --git a/core/lib/dal/README.md b/core/lib/dal/README.md index 59f4401924ee..cc247733467d 100644 --- a/core/lib/dal/README.md +++ b/core/lib/dal/README.md @@ -83,6 +83,12 @@ invariants are expected to be upheld: - L2 blocks and L1 batches present in the DB form a continuous range of numbers. If a DB is recovered from a node snapshot, the first L2 block / L1 batch is **the next one** after the snapshot L2 block / L1 batch mentioned in the `snapshot_recovery` table. Otherwise, L2 blocks / L1 batches must start from number 0 (aka genesis). +- `address` and `key` fields in the `storage_logs` table are not null for all blocks executed on the node (i.e., blocks + the header of which is present in `miniblocks`). On the other hand, `address` and `key` fields may be null for + snapshot storage logs. These fields are needed for some components post-processing L1 batches, such as the Merkle tree + and the commitment generator. Both use `(address, key)` tuples to sort logs in a batch to get canonical ordering. + Since a snapshot is not post-processed in such a way, it is acceptable to skip them for the snapshot logs (and only + for them). ## Contributing to DAL diff --git a/core/lib/dal/migrations/20240617103351_make_key_preimage_nullable.down.sql b/core/lib/dal/migrations/20240617103351_make_key_preimage_nullable.down.sql new file mode 100644 index 000000000000..4348c29caef5 --- /dev/null +++ b/core/lib/dal/migrations/20240617103351_make_key_preimage_nullable.down.sql @@ -0,0 +1,3 @@ +ALTER TABLE storage_logs + ALTER COLUMN address SET NOT NULL, + ALTER COLUMN key SET NOT NULL; diff --git a/core/lib/dal/migrations/20240617103351_make_key_preimage_nullable.up.sql b/core/lib/dal/migrations/20240617103351_make_key_preimage_nullable.up.sql new file mode 100644 index 000000000000..18a623c67f56 --- /dev/null +++ b/core/lib/dal/migrations/20240617103351_make_key_preimage_nullable.up.sql @@ -0,0 +1,3 @@ +ALTER TABLE storage_logs + ALTER COLUMN address DROP NOT NULL, + ALTER COLUMN key DROP NOT NULL; diff --git a/core/lib/dal/src/models/storage_log.rs b/core/lib/dal/src/models/storage_log.rs index ef3a018f9e46..055f37cde559 100644 --- a/core/lib/dal/src/models/storage_log.rs +++ b/core/lib/dal/src/models/storage_log.rs @@ -12,8 +12,8 @@ pub struct DbInitialWrite { #[derive(Debug, PartialEq)] pub struct DbStorageLog { pub hashed_key: H256, - pub address: H160, - pub key: H256, + pub address: Option, + pub key: Option, pub value: H256, pub operation_number: u64, pub l2_block_number: L2BlockNumber, diff --git a/core/lib/dal/src/snapshots_creator_dal.rs b/core/lib/dal/src/snapshots_creator_dal.rs index fef3ee5b7198..b076240173b1 100644 --- a/core/lib/dal/src/snapshots_creator_dal.rs +++ b/core/lib/dal/src/snapshots_creator_dal.rs @@ -55,9 +55,74 @@ impl SnapshotsCreatorDal<'_, '_> { let storage_logs = sqlx::query!( r#" SELECT - storage_logs.key AS "key!", + storage_logs.hashed_key AS "hashed_key!", storage_logs.value AS "value!", + storage_logs.miniblock_number AS "miniblock_number!", + initial_writes.l1_batch_number AS "l1_batch_number!", + initial_writes.index + FROM + ( + SELECT + hashed_key, + MAX(ARRAY[miniblock_number, operation_number]::INT[]) AS op + FROM + storage_logs + WHERE + miniblock_number <= $1 + AND hashed_key >= $3 + AND hashed_key <= $4 + GROUP BY + hashed_key + ORDER BY + hashed_key + ) AS keys + INNER JOIN storage_logs ON keys.hashed_key = storage_logs.hashed_key + AND storage_logs.miniblock_number = keys.op[1] + AND storage_logs.operation_number = keys.op[2] + INNER JOIN initial_writes ON keys.hashed_key = initial_writes.hashed_key + WHERE + initial_writes.l1_batch_number <= $2 + "#, + i64::from(l2_block_number.0), + i64::from(l1_batch_number.0), + hashed_keys_range.start().as_bytes(), + hashed_keys_range.end().as_bytes() + ) + .instrument("get_storage_logs_chunk") + .with_arg("l2_block_number", &l2_block_number) + .with_arg("min_hashed_key", &hashed_keys_range.start()) + .with_arg("max_hashed_key", &hashed_keys_range.end()) + .report_latency() + .expect_slow_query() + .fetch_all(self.storage) + .await? + .iter() + .map(|row| SnapshotStorageLog { + key: H256::from_slice(&row.hashed_key), + value: H256::from_slice(&row.value), + l1_batch_number_of_initial_write: L1BatchNumber(row.l1_batch_number as u32), + enumeration_index: row.index as u64, + }) + .collect(); + Ok(storage_logs) + } + + /// Same as [`Self::get_storage_logs_chunk()`], but returns full keys. + #[deprecated( + note = "will fail if called on a node restored from a v1 snapshot; use `get_storage_logs_chunk()` instead" + )] + pub async fn get_storage_logs_chunk_with_key_preimages( + &mut self, + l2_block_number: L2BlockNumber, + l1_batch_number: L1BatchNumber, + hashed_keys_range: std::ops::RangeInclusive, + ) -> DalResult>> { + let storage_logs = sqlx::query!( + r#" + SELECT storage_logs.address AS "address!", + storage_logs.key AS "key!", + storage_logs.value AS "value!", storage_logs.miniblock_number AS "miniblock_number!", initial_writes.l1_batch_number AS "l1_batch_number!", initial_writes.index @@ -169,6 +234,7 @@ mod tests { .unwrap(); let mut written_keys: Vec<_> = logs.iter().map(|log| log.key).collect(); written_keys.sort_unstable(); + let written_keys: Vec<_> = written_keys.iter().map(StorageKey::hashed_key).collect(); conn.storage_logs_dedup_dal() .insert_initial_writes(L1BatchNumber(1), &written_keys) .await @@ -190,7 +256,7 @@ mod tests { ); StorageLog::new_write_log(key, H256::repeat_byte(1)) }); - let new_written_keys: Vec<_> = new_logs.clone().map(|log| log.key).collect(); + let new_written_keys: Vec<_> = new_logs.clone().map(|log| log.key.hashed_key()).collect(); let updated_logs = logs.iter().step_by(3).map(|&log| StorageLog { value: H256::repeat_byte(23), ..log @@ -238,7 +304,7 @@ mod tests { .unwrap(); assert_eq!(all_logs.len(), expected_logs.len()); for (log, expected_log) in all_logs.iter().zip(expected_logs) { - assert_eq!(log.key, expected_log.key); + assert_eq!(log.key, expected_log.key.hashed_key()); assert_eq!(log.value, expected_log.value); assert_eq!(log.l1_batch_number_of_initial_write, l1_batch_number); } @@ -253,7 +319,7 @@ mod tests { .unwrap(); assert_eq!(logs.len(), chunk.len()); for (log, expected_log) in logs.iter().zip(chunk) { - assert_eq!(log.key, expected_log.key); + assert_eq!(log.key, expected_log.key.hashed_key()); assert_eq!(log.value, expected_log.value); } } @@ -282,7 +348,7 @@ mod tests { .await .unwrap(); conn.storage_logs_dedup_dal() - .insert_initial_writes(L1BatchNumber(2), &[key]) + .insert_initial_writes(L1BatchNumber(2), &[key.hashed_key()]) .await .unwrap(); @@ -307,7 +373,7 @@ mod tests { .await .unwrap(); assert_eq!(logs.len(), 1); - assert_eq!(logs[0].key, key); + assert_eq!(logs[0].key, key.hashed_key()); assert_eq!(logs[0].value, real_write.value); assert_eq!(logs[0].l1_batch_number_of_initial_write, L1BatchNumber(2)); } diff --git a/core/lib/dal/src/storage_logs_dal.rs b/core/lib/dal/src/storage_logs_dal.rs index 052e93370333..d5de66037b49 100644 --- a/core/lib/dal/src/storage_logs_dal.rs +++ b/core/lib/dal/src/storage_logs_dal.rs @@ -72,10 +72,11 @@ impl StorageLogsDal<'_, '_> { copy.send(buffer.as_bytes()).await } - pub async fn insert_storage_logs_from_snapshot( + #[deprecated(note = "Will be removed in favor of `insert_storage_logs_from_snapshot()`")] + pub async fn insert_storage_logs_with_preimages_from_snapshot( &mut self, l2_block_number: L2BlockNumber, - snapshot_storage_logs: &[SnapshotStorageLog], + snapshot_storage_logs: &[SnapshotStorageLog], ) -> DalResult<()> { let storage_logs_len = snapshot_storage_logs.len(); let copy = CopyStatement::new( @@ -112,6 +113,44 @@ impl StorageLogsDal<'_, '_> { copy.send(buffer.as_bytes()).await } + pub async fn insert_storage_logs_from_snapshot( + &mut self, + l2_block_number: L2BlockNumber, + snapshot_storage_logs: &[SnapshotStorageLog], + ) -> DalResult<()> { + let storage_logs_len = snapshot_storage_logs.len(); + let copy = CopyStatement::new( + "COPY storage_logs( + hashed_key, value, operation_number, tx_hash, miniblock_number, + created_at, updated_at + ) + FROM STDIN WITH (DELIMITER '|')", + ) + .instrument("insert_storage_logs_from_snapshot") + .with_arg("l2_block_number", &l2_block_number) + .with_arg("storage_logs.len", &storage_logs_len) + .start(self.storage) + .await?; + + let mut buffer = String::new(); + let now = Utc::now().naive_utc().to_string(); + for log in snapshot_storage_logs.iter() { + write_str!( + &mut buffer, + r"\\x{hashed_key:x}|\\x{value:x}|", + hashed_key = log.key, + value = log.value + ); + writeln_str!( + &mut buffer, + r"{}|\\x{:x}|{l2_block_number}|{now}|{now}", + log.enumeration_index, + H256::zero() + ); + } + copy.send(buffer.as_bytes()).await + } + pub async fn append_storage_logs( &mut self, block_number: L2BlockNumber, @@ -299,17 +338,16 @@ impl StorageLogsDal<'_, '_> { Ok(deployment_data.collect()) } - /// Returns latest values for all [`StorageKey`]s written to in the specified L1 batch + /// Returns latest values for all slots written to in the specified L1 batch /// judging by storage logs (i.e., not taking deduplication logic into account). pub async fn get_touched_slots_for_l1_batch( &mut self, l1_batch_number: L1BatchNumber, - ) -> DalResult> { + ) -> DalResult> { let rows = sqlx::query!( r#" SELECT - address, - key, + hashed_key, value FROM storage_logs @@ -340,6 +378,57 @@ impl StorageLogsDal<'_, '_> { .fetch_all(self.storage) .await?; + let touched_slots = rows.into_iter().map(|row| { + ( + H256::from_slice(&row.hashed_key), + H256::from_slice(&row.value), + ) + }); + Ok(touched_slots.collect()) + } + + /// Same as [`Self::get_touched_slots_for_l1_batch()`], but loads key preimages instead of hashed keys. + /// Correspondingly, this method is safe to call for locally executed L1 batches, for which key preimages + /// are known; otherwise, it will error. + pub async fn get_touched_slots_for_executed_l1_batch( + &mut self, + l1_batch_number: L1BatchNumber, + ) -> DalResult> { + let rows = sqlx::query!( + r#" + SELECT + address AS "address!", + key AS "key!", + value + FROM + storage_logs + WHERE + miniblock_number BETWEEN ( + SELECT + MIN(number) + FROM + miniblocks + WHERE + l1_batch_number = $1 + ) AND ( + SELECT + MAX(number) + FROM + miniblocks + WHERE + l1_batch_number = $1 + ) + ORDER BY + miniblock_number, + operation_number + "#, + i64::from(l1_batch_number.0) + ) + .instrument("get_touched_slots_for_executed_l1_batch") + .with_arg("l1_batch_number", &l1_batch_number) + .fetch_all(self.storage) + .await?; + let touched_slots = rows.into_iter().map(|row| { let key = StorageKey::new( AccountTreeId::new(Address::from_slice(&row.address)), @@ -578,8 +667,8 @@ impl StorageLogsDal<'_, '_> { rows.into_iter() .map(|row| DbStorageLog { hashed_key: H256::from_slice(&row.hashed_key), - address: H160::from_slice(&row.address), - key: H256::from_slice(&row.key), + address: row.address.as_deref().map(H160::from_slice), + key: row.key.as_deref().map(H256::from_slice), value: H256::from_slice(&row.value), operation_number: row.operation_number as u64, l2_block_number: L2BlockNumber(row.miniblock_number as u32), @@ -720,7 +809,9 @@ impl StorageLogsDal<'_, '_> { #[cfg(test)] mod tests { use zksync_contracts::BaseSystemContractsHashes; - use zksync_types::{block::L1BatchHeader, ProtocolVersion, ProtocolVersionId}; + use zksync_types::{ + block::L1BatchHeader, AccountTreeId, ProtocolVersion, ProtocolVersionId, StorageKey, + }; use super::*; use crate::{tests::create_l2_block_header, ConnectionPool, Core}; @@ -773,8 +864,11 @@ mod tests { .await .unwrap(); assert_eq!(touched_slots.len(), 2); - assert_eq!(touched_slots[&first_key], H256::repeat_byte(1)); - assert_eq!(touched_slots[&second_key], H256::repeat_byte(2)); + assert_eq!(touched_slots[&first_key.hashed_key()], H256::repeat_byte(1)); + assert_eq!( + touched_slots[&second_key.hashed_key()], + H256::repeat_byte(2) + ); // Add more logs and check log ordering. let third_log = StorageLog::new_write_log(first_key, H256::repeat_byte(3)); @@ -790,8 +884,11 @@ mod tests { .await .unwrap(); assert_eq!(touched_slots.len(), 2); - assert_eq!(touched_slots[&first_key], H256::repeat_byte(3)); - assert_eq!(touched_slots[&second_key], H256::repeat_byte(2)); + assert_eq!(touched_slots[&first_key.hashed_key()], H256::repeat_byte(3)); + assert_eq!( + touched_slots[&second_key.hashed_key()], + H256::repeat_byte(2) + ); test_revert(&mut conn, first_key, second_key).await; } @@ -861,7 +958,7 @@ mod tests { }) .collect(); insert_l2_block(&mut conn, 1, logs.clone()).await; - let written_keys: Vec<_> = logs.iter().map(|log| log.key).collect(); + let written_keys: Vec<_> = logs.iter().map(|log| log.key.hashed_key()).collect(); conn.storage_logs_dedup_dal() .insert_initial_writes(L1BatchNumber(1), &written_keys) .await @@ -874,7 +971,10 @@ mod tests { }) .collect(); insert_l2_block(&mut conn, 2, new_logs.clone()).await; - let new_written_keys: Vec<_> = new_logs[5..].iter().map(|log| log.key).collect(); + let new_written_keys: Vec<_> = new_logs[5..] + .iter() + .map(|log| log.key.hashed_key()) + .collect(); conn.storage_logs_dedup_dal() .insert_initial_writes(L1BatchNumber(2), &new_written_keys) .await @@ -931,8 +1031,9 @@ mod tests { let initial_keys: Vec<_> = logs .iter() .filter_map(|log| { - (!log.value.is_zero() && !non_initial.contains(&log.key.hashed_key())) - .then_some(log.key) + let hashed_key = log.key.hashed_key(); + (!log.value.is_zero() && !non_initial.contains(&hashed_key)) + .then_some(hashed_key) }) .collect(); @@ -1016,6 +1117,7 @@ mod tests { let mut initial_keys: Vec<_> = logs.iter().map(|log| log.key).collect(); initial_keys.sort_unstable(); + let initial_keys: Vec<_> = initial_keys.iter().map(StorageKey::hashed_key).collect(); conn.storage_logs_dedup_dal() .insert_initial_writes(L1BatchNumber(1), &initial_keys) .await diff --git a/core/lib/dal/src/storage_logs_dedup_dal.rs b/core/lib/dal/src/storage_logs_dedup_dal.rs index 6df54c54fc51..02049f3e9ade 100644 --- a/core/lib/dal/src/storage_logs_dedup_dal.rs +++ b/core/lib/dal/src/storage_logs_dedup_dal.rs @@ -68,14 +68,10 @@ impl StorageLogsDedupDal<'_, '_> { let mut bytes: Vec = Vec::new(); let now = Utc::now().naive_utc().to_string(); - for log in snapshot_storage_logs.iter() { + for log in snapshot_storage_logs { let row = format!( "\\\\x{:x}|{}|{}|{}|{}\n", - log.key.hashed_key(), - log.enumeration_index, - log.l1_batch_number_of_initial_write, - now, - now, + log.key, log.enumeration_index, log.l1_batch_number_of_initial_write, now, now, ); bytes.extend_from_slice(row.as_bytes()); } @@ -85,12 +81,9 @@ impl StorageLogsDedupDal<'_, '_> { pub async fn insert_initial_writes( &mut self, l1_batch_number: L1BatchNumber, - written_storage_keys: &[StorageKey], + written_hashed_keys: &[H256], ) -> DalResult<()> { - let hashed_keys: Vec<_> = written_storage_keys - .iter() - .map(|key| StorageKey::raw_hashed_key(key.address(), key.key()).to_vec()) - .collect(); + let hashed_keys: Vec<_> = written_hashed_keys.iter().map(H256::as_bytes).collect(); let last_index = self.max_enumeration_index().await?.unwrap_or(0); let indices: Vec<_> = ((last_index + 1)..=(last_index + hashed_keys.len() as u64)) @@ -110,7 +103,7 @@ impl StorageLogsDedupDal<'_, '_> { FROM UNNEST($1::bytea[], $2::BIGINT[]) AS u (hashed_key, INDEX) "#, - &hashed_keys, + &hashed_keys as &[&[u8]], &indices, i64::from(l1_batch_number.0) ) @@ -343,8 +336,8 @@ mod tests { let account = AccountTreeId::new(Address::repeat_byte(1)); let initial_writes = [ - StorageKey::new(account, H256::zero()), - StorageKey::new(account, H256::repeat_byte(1)), + StorageKey::new(account, H256::zero()).hashed_key(), + StorageKey::new(account, H256::repeat_byte(1)).hashed_key(), ]; conn.storage_logs_dedup_dal() .insert_initial_writes(L1BatchNumber(0), &initial_writes) @@ -359,8 +352,8 @@ mod tests { assert_eq!(max_index, Some(2)); let initial_writes = [ - StorageKey::new(account, H256::repeat_byte(2)), - StorageKey::new(account, H256::repeat_byte(3)), + StorageKey::new(account, H256::repeat_byte(2)).hashed_key(), + StorageKey::new(account, H256::repeat_byte(3)).hashed_key(), ]; conn.storage_logs_dedup_dal() .insert_initial_writes(L1BatchNumber(1), &initial_writes) diff --git a/core/lib/dal/src/storage_web3_dal.rs b/core/lib/dal/src/storage_web3_dal.rs index 843752360eff..f54ac766ee8c 100644 --- a/core/lib/dal/src/storage_web3_dal.rs +++ b/core/lib/dal/src/storage_web3_dal.rs @@ -28,7 +28,7 @@ impl StorageWeb3Dal<'_, '_> { ) -> DalResult { let nonce_key = get_nonce_key(&address); let nonce_value = self - .get_historical_value_unchecked(&nonce_key, block_number) + .get_historical_value_unchecked(nonce_key.hashed_key(), block_number) .await?; let full_nonce = h256_to_u256(nonce_value); Ok(decompose_full_nonce(full_nonce).0) @@ -66,13 +66,14 @@ impl StorageWeb3Dal<'_, '_> { ) -> DalResult { let key = storage_key_for_standard_token_balance(token_id, account_id.address()); let balance = self - .get_historical_value_unchecked(&key, block_number) + .get_historical_value_unchecked(key.hashed_key(), block_number) .await?; Ok(h256_to_u256(balance)) } /// Gets the current value for the specified `key`. Uses state of the latest sealed L2 block. /// Returns error if there is no sealed L2 blocks. + // FIXME: propagate hashed_key? pub async fn get_value(&mut self, key: &StorageKey) -> DalResult { let Some(l2_block_number) = self .storage @@ -85,7 +86,7 @@ impl StorageWeb3Dal<'_, '_> { .constraint_error(anyhow::anyhow!("no sealed l2 blocks")); return Err(err); }; - self.get_historical_value_unchecked(key, l2_block_number) + self.get_historical_value_unchecked(key.hashed_key(), l2_block_number) .await } @@ -119,11 +120,9 @@ impl StorageWeb3Dal<'_, '_> { /// It will return the current value if the block is in the future. pub async fn get_historical_value_unchecked( &mut self, - key: &StorageKey, + hashed_key: H256, block_number: L2BlockNumber, ) -> DalResult { - let hashed_key = key.hashed_key(); - sqlx::query!( r#" SELECT @@ -204,9 +203,8 @@ impl StorageWeb3Dal<'_, '_> { pub async fn get_l1_batch_number_for_initial_write( &mut self, - key: &StorageKey, + hashed_key: H256, ) -> DalResult> { - let hashed_key = key.hashed_key(); let row = sqlx::query!( r#" SELECT diff --git a/core/lib/merkle_tree/src/domain.rs b/core/lib/merkle_tree/src/domain.rs index ffc4b0b84106..5cb53355765c 100644 --- a/core/lib/merkle_tree/src/domain.rs +++ b/core/lib/merkle_tree/src/domain.rs @@ -15,6 +15,20 @@ use crate::{ BlockOutput, HashTree, MerkleTree, MerkleTreePruner, MerkleTreePrunerHandle, NoVersionError, }; +impl TreeInstruction { + /// Maps the key preimage in this instruction to a hashed key used by the Merkle tree. + pub fn with_hashed_key(self) -> TreeInstruction { + match self { + Self::Read(key) => TreeInstruction::Read(key.hashed_key_u256()), + Self::Write(entry) => TreeInstruction::Write(TreeEntry { + key: entry.key.hashed_key_u256(), + value: entry.value, + leaf_index: entry.leaf_index, + }), + } + } +} + /// Metadata for the current tree state. #[derive(Debug, Clone)] pub struct TreeMetadata { @@ -63,18 +77,13 @@ impl ZkSyncTree { /// Returns metadata based on `storage_logs` generated by the genesis L1 batch. This does not /// create a persistent tree. #[allow(clippy::missing_panics_doc)] // false positive - pub fn process_genesis_batch(storage_logs: &[TreeInstruction]) -> BlockOutput { + pub fn process_genesis_batch(storage_logs: &[TreeInstruction]) -> BlockOutput { let kvs = Self::filter_write_instructions(storage_logs); tracing::info!( - "Creating Merkle tree for genesis batch with {instr_count} writes", + "Creating Merkle tree for genesis batch with {instr_count} writes", instr_count = kvs.len() ); - let kvs: Vec<_> = kvs - .iter() - .map(|instr| instr.map_key(StorageKey::hashed_key_u256)) - .collect(); - // `unwrap()`s are safe: in-memory trees never raise I/O errors let mut in_memory_tree = MerkleTree::new(PatchSet::default()).unwrap(); let output = in_memory_tree.extend(kvs).unwrap(); @@ -212,7 +221,7 @@ impl ZkSyncTree { /// Proxies database I/O errors. pub fn process_l1_batch( &mut self, - storage_logs: &[TreeInstruction], + storage_logs: &[TreeInstruction], ) -> anyhow::Result { match self.mode { TreeMode::Full => self.process_l1_batch_full(storage_logs), @@ -222,26 +231,21 @@ impl ZkSyncTree { fn process_l1_batch_full( &mut self, - instructions: &[TreeInstruction], + instructions: &[TreeInstruction], ) -> anyhow::Result { let l1_batch_number = self.next_l1_batch_number(); let starting_leaf_count = self.tree.latest_root().leaf_count(); let starting_root_hash = self.tree.latest_root_hash(); - let instructions_with_hashed_keys: Vec<_> = instructions - .iter() - .map(|instr| instr.map_key(StorageKey::hashed_key_u256)) - .collect(); - tracing::info!( "Extending Merkle tree with batch #{l1_batch_number} with {instr_count} ops in full mode", instr_count = instructions.len() ); let output = if let Some(thread_pool) = &self.thread_pool { - thread_pool.install(|| self.tree.extend_with_proofs(instructions_with_hashed_keys)) + thread_pool.install(|| self.tree.extend_with_proofs(instructions.to_vec())) } else { - self.tree.extend_with_proofs(instructions_with_hashed_keys) + self.tree.extend_with_proofs(instructions.to_vec()) }?; let mut witness = PrepareBasicCircuitsJob::new(starting_leaf_count + 1); @@ -265,7 +269,7 @@ impl ZkSyncTree { is_write: !log.base.is_read(), first_write: matches!(log.base, TreeLogEntry::Inserted), merkle_paths, - leaf_hashed_key: instruction.key().hashed_key_u256(), + leaf_hashed_key: instruction.key(), leaf_enumeration_index: match instruction { TreeInstruction::Write(entry) => entry.leaf_index, TreeInstruction::Read(_) => match log.base { @@ -307,7 +311,7 @@ impl ZkSyncTree { fn process_l1_batch_lightweight( &mut self, - instructions: &[TreeInstruction], + instructions: &[TreeInstruction], ) -> anyhow::Result { let kvs = Self::filter_write_instructions(instructions); let l1_batch_number = self.next_l1_batch_number(); @@ -317,15 +321,10 @@ impl ZkSyncTree { kv_count = kvs.len() ); - let kvs_with_derived_key: Vec<_> = kvs - .iter() - .map(|entry| entry.map_key(StorageKey::hashed_key_u256)) - .collect(); - let output = if let Some(thread_pool) = &self.thread_pool { - thread_pool.install(|| self.tree.extend(kvs_with_derived_key.clone())) + thread_pool.install(|| self.tree.extend(kvs)) } else { - self.tree.extend(kvs_with_derived_key.clone()) + self.tree.extend(kvs) }?; tracing::info!( @@ -342,9 +341,7 @@ impl ZkSyncTree { }) } - fn filter_write_instructions( - instructions: &[TreeInstruction], - ) -> Vec> { + fn filter_write_instructions(instructions: &[TreeInstruction]) -> Vec { let kvs = instructions .iter() .filter_map(|instruction| match instruction { diff --git a/core/lib/merkle_tree/src/types/mod.rs b/core/lib/merkle_tree/src/types/mod.rs index bd59099a3a65..807ae0238769 100644 --- a/core/lib/merkle_tree/src/types/mod.rs +++ b/core/lib/merkle_tree/src/types/mod.rs @@ -38,13 +38,6 @@ impl TreeInstruction { Self::Write(entry) => entry.key, } } - - pub(crate) fn map_key(&self, map_fn: impl FnOnce(&K) -> U) -> TreeInstruction { - match self { - Self::Read(key) => TreeInstruction::Read(map_fn(key)), - Self::Write(entry) => TreeInstruction::Write(entry.map_key(map_fn)), - } - } } /// Entry in a Merkle tree associated with a key. @@ -77,10 +70,6 @@ impl TreeEntry { leaf_index, } } - - pub(crate) fn map_key(&self, map_fn: impl FnOnce(&K) -> U) -> TreeEntry { - TreeEntry::new(map_fn(&self.key), self.leaf_index, self.value) - } } impl TreeEntry { diff --git a/core/lib/merkle_tree/tests/integration/domain.rs b/core/lib/merkle_tree/tests/integration/domain.rs index db5accf30a6b..85b761f7b4b8 100644 --- a/core/lib/merkle_tree/tests/integration/domain.rs +++ b/core/lib/merkle_tree/tests/integration/domain.rs @@ -12,7 +12,7 @@ use zksync_storage::RocksDB; use zksync_system_constants::ACCOUNT_CODE_STORAGE_ADDRESS; use zksync_types::{AccountTreeId, Address, L1BatchNumber, StorageKey, H256}; -fn gen_storage_logs() -> Vec> { +fn gen_storage_logs() -> Vec { let addrs = vec![ "4b3af74f66ab1f0da3f2e4ec7a3cb99baf1af7b2", "ef4bb7b21c5fe7432a7d63876cc59ecc23b46636", @@ -32,7 +32,7 @@ fn gen_storage_logs() -> Vec> { .zip(proof_values) .enumerate() .map(|(i, (proof_key, proof_value))| { - let entry = TreeEntry::new(proof_key, i as u64 + 1, proof_value); + let entry = TreeEntry::new(proof_key.hashed_key_u256(), i as u64 + 1, proof_value); TreeInstruction::Write(entry) }) .collect() @@ -171,11 +171,12 @@ fn revert_blocks() { // Produce 4 blocks with distinct values and 1 block with modified values from first block let block_size: usize = 25; let address: Address = "4b3af74f66ab1f0da3f2e4ec7a3cb99baf1af7b2".parse().unwrap(); - let proof_keys = (0..100) - .map(move |i| StorageKey::new(AccountTreeId::new(address), H256::from_low_u64_be(i))); + let proof_keys = (0..100).map(move |i| { + StorageKey::new(AccountTreeId::new(address), H256::from_low_u64_be(i)).hashed_key_u256() + }); let proof_values = (0..100).map(H256::from_low_u64_be); - // Add couple of blocks of distinct keys/values + // Add a couple of blocks of distinct keys/values let mut logs: Vec<_> = proof_keys .zip(proof_values) .map(|(proof_key, proof_value)| { @@ -185,7 +186,8 @@ fn revert_blocks() { .collect(); // Add a block with repeated keys let extra_logs = (0..block_size).map(move |i| { - let key = StorageKey::new(AccountTreeId::new(address), H256::from_low_u64_be(i as u64)); + let key = StorageKey::new(AccountTreeId::new(address), H256::from_low_u64_be(i as u64)) + .hashed_key_u256(); let entry = TreeEntry::new(key, i as u64 + 1, H256::from_low_u64_be(i as u64 + 1)); TreeInstruction::Write(entry) }); @@ -317,9 +319,13 @@ fn create_write_log( address: Address, address_storage_key: [u8; 32], value: [u8; 32], -) -> TreeInstruction { +) -> TreeInstruction { let key = StorageKey::new(AccountTreeId::new(address), H256(address_storage_key)); - TreeInstruction::Write(TreeEntry::new(key, leaf_index, H256(value))) + TreeInstruction::Write(TreeEntry::new( + key.hashed_key_u256(), + leaf_index, + H256(value), + )) } fn subtract_from_max_value(diff: u8) -> [u8; 32] { diff --git a/core/lib/object_store/src/objects.rs b/core/lib/object_store/src/objects.rs index d67e4e5df137..897c93e0b6f8 100644 --- a/core/lib/object_store/src/objects.rs +++ b/core/lib/object_store/src/objects.rs @@ -87,11 +87,15 @@ impl StoredObject for SnapshotFactoryDependencies { } } -impl StoredObject for SnapshotStorageLogsChunk { +impl StoredObject for SnapshotStorageLogsChunk +where + Self: ProtoFmt, +{ const BUCKET: Bucket = Bucket::StorageSnapshot; type Key<'a> = SnapshotStorageLogsStorageKey; fn encode_key(key: Self::Key<'_>) -> String { + // FIXME: should keys be separated by version? format!( "snapshot_l1_batch_{}_storage_logs_part_{:0>4}.proto.gzip", key.l1_batch_number, key.chunk_id @@ -181,7 +185,7 @@ mod tests { use zksync_types::{ snapshots::{SnapshotFactoryDependency, SnapshotStorageLog}, web3::Bytes, - AccountTreeId, StorageKey, H160, H256, + H256, }; use super::*; @@ -189,15 +193,15 @@ mod tests { #[test] fn test_storage_logs_filesnames_generate_corretly() { - let filename1 = SnapshotStorageLogsChunk::encode_key(SnapshotStorageLogsStorageKey { + let filename1 = ::encode_key(SnapshotStorageLogsStorageKey { l1_batch_number: L1BatchNumber(42), chunk_id: 97, }); - let filename2 = SnapshotStorageLogsChunk::encode_key(SnapshotStorageLogsStorageKey { + let filename2 = ::encode_key(SnapshotStorageLogsStorageKey { l1_batch_number: L1BatchNumber(3), chunk_id: 531, }); - let filename3 = SnapshotStorageLogsChunk::encode_key(SnapshotStorageLogsStorageKey { + let filename3 = ::encode_key(SnapshotStorageLogsStorageKey { l1_batch_number: L1BatchNumber(567), chunk_id: 5, }); @@ -225,13 +229,13 @@ mod tests { let storage_logs = SnapshotStorageLogsChunk { storage_logs: vec![ SnapshotStorageLog { - key: StorageKey::new(AccountTreeId::new(H160::random()), H256::random()), + key: H256::random(), value: H256::random(), l1_batch_number_of_initial_write: L1BatchNumber(123), enumeration_index: 234, }, SnapshotStorageLog { - key: StorageKey::new(AccountTreeId::new(H160::random()), H256::random()), + key: H256::random(), value: H256::random(), l1_batch_number_of_initial_write: L1BatchNumber(345), enumeration_index: 456, diff --git a/core/lib/protobuf_config/src/proto/config/experimental.proto b/core/lib/protobuf_config/src/proto/config/experimental.proto index 6f9ec426d8bb..1336c4719d26 100644 --- a/core/lib/protobuf_config/src/proto/config/experimental.proto +++ b/core/lib/protobuf_config/src/proto/config/experimental.proto @@ -16,4 +16,5 @@ message DB { // Experimental part of the Snapshot recovery configuration. message SnapshotRecovery { optional uint64 tree_recovery_parallel_persistence_buffer = 1; + optional bool drop_storage_key_preimages = 2; // optional; false by default } diff --git a/core/lib/protobuf_config/src/proto/config/snapshots_creator.proto b/core/lib/protobuf_config/src/proto/config/snapshots_creator.proto index 7aaa39a57f62..3846d86d6291 100644 --- a/core/lib/protobuf_config/src/proto/config/snapshots_creator.proto +++ b/core/lib/protobuf_config/src/proto/config/snapshots_creator.proto @@ -7,4 +7,6 @@ message SnapshotsCreator { optional uint64 storage_logs_chunk_size = 1; // optional optional uint32 concurrent_queries_count = 2; // optional optional config.object_store.ObjectStore object_store = 3; + optional uint32 version = 4; // optional; defaults to 0 + optional uint32 l1_batch_number = 5; // optional } diff --git a/core/lib/protobuf_config/src/snapshot_recovery.rs b/core/lib/protobuf_config/src/snapshot_recovery.rs index 4023cbb0c097..0c195abffe7a 100644 --- a/core/lib/protobuf_config/src/snapshot_recovery.rs +++ b/core/lib/protobuf_config/src/snapshot_recovery.rs @@ -60,6 +60,11 @@ impl ProtoRepr for proto::SnapshotRecovery { .unwrap_or_default(), l1_batch: self.l1_batch.map(L1BatchNumber), object_store: read_optional_repr(&self.object_store).context("object store")?, + drop_storage_key_preimages: self + .experimental + .as_ref() + .and_then(|experimental| experimental.drop_storage_key_preimages) + .unwrap_or_default(), }) } @@ -76,6 +81,7 @@ impl ProtoRepr for proto::SnapshotRecovery { .tree .parallel_persistence_buffer .map(|a| a.get() as u64), + drop_storage_key_preimages: Some(this.drop_storage_key_preimages), }), ) }; diff --git a/core/lib/protobuf_config/src/snapshots_creator.rs b/core/lib/protobuf_config/src/snapshots_creator.rs index b13d11915b10..d21fb2c321fd 100644 --- a/core/lib/protobuf_config/src/snapshots_creator.rs +++ b/core/lib/protobuf_config/src/snapshots_creator.rs @@ -1,4 +1,5 @@ use anyhow::Context as _; +use zksync_basic_types::L1BatchNumber; use zksync_config::configs; use zksync_protobuf::{repr::ProtoRepr, required}; @@ -13,6 +14,12 @@ impl ProtoRepr for proto::SnapshotsCreator { None }; Ok(Self::Type { + version: self + .version + .unwrap_or_default() + .try_into() + .context("version")?, + l1_batch_number: self.l1_batch_number.map(L1BatchNumber), storage_logs_chunk_size: *required(&self.storage_logs_chunk_size) .context("storage_logs_chunk_size")?, concurrent_queries_count: *required(&self.concurrent_queries_count) @@ -23,6 +30,8 @@ impl ProtoRepr for proto::SnapshotsCreator { fn build(this: &Self::Type) -> Self { Self { + version: Some(this.version.into()), + l1_batch_number: this.l1_batch_number.map(|num| num.0), storage_logs_chunk_size: Some(this.storage_logs_chunk_size), concurrent_queries_count: Some(this.concurrent_queries_count), object_store: this.object_store.as_ref().map(ProtoRepr::build), diff --git a/core/lib/snapshots_applier/src/lib.rs b/core/lib/snapshots_applier/src/lib.rs index ea1c11f40c2c..e160a2b96275 100644 --- a/core/lib/snapshots_applier/src/lib.rs +++ b/core/lib/snapshots_applier/src/lib.rs @@ -1,6 +1,6 @@ //! Logic for applying application-level snapshots to Postgres storage. -use std::{collections::HashMap, fmt, num::NonZeroUsize, sync::Arc, time::Duration}; +use std::{collections::HashMap, fmt, mem, num::NonZeroUsize, sync::Arc, time::Duration}; use anyhow::Context as _; use async_trait::async_trait; @@ -16,7 +16,7 @@ use zksync_types::{ SnapshotStorageLogsChunk, SnapshotStorageLogsStorageKey, SnapshotVersion, }, tokens::TokenInfo, - L1BatchNumber, L2BlockNumber, H256, + L1BatchNumber, L2BlockNumber, StorageKey, H256, }; use zksync_utils::bytecode::hash_bytecode; use zksync_web3_decl::{ @@ -237,6 +237,7 @@ pub struct SnapshotApplierTaskStats { #[derive(Debug)] pub struct SnapshotsApplierTask { snapshot_l1_batch: Option, + drop_storage_key_preimages: bool, config: SnapshotsApplierConfig, health_updater: HealthUpdater, connection_pool: ConnectionPool, @@ -253,6 +254,7 @@ impl SnapshotsApplierTask { ) -> Self { Self { snapshot_l1_batch: None, + drop_storage_key_preimages: false, config, health_updater: ReactiveHealthCheck::new("snapshot_recovery").1, connection_pool, @@ -266,6 +268,12 @@ impl SnapshotsApplierTask { self.snapshot_l1_batch = Some(number); } + /// Enables dropping storage key preimages when recovering storage logs from a snapshot with version 0. + /// This is a temporary flag that will eventually be removed together with version 0 snapshot support. + pub fn drop_storage_key_preimages(&mut self) { + self.drop_storage_key_preimages = true; + } + /// Returns the health check for snapshot recovery. pub fn health_check(&self) -> ReactiveHealthCheck { self.health_updater.subscribe() @@ -285,15 +293,7 @@ impl SnapshotsApplierTask { let mut backoff = self.config.initial_retry_backoff; let mut last_error = None; for retry_id in 0..self.config.retry_count { - let result = SnapshotsApplier::load_snapshot( - &self.connection_pool, - self.main_node_client.as_ref(), - self.blob_store.as_ref(), - &self.health_updater, - self.snapshot_l1_batch, - self.config.max_concurrency.get(), - ) - .await; + let result = SnapshotsApplier::load_snapshot(&self).await; match result { Ok((strategy, final_status)) => { @@ -334,9 +334,9 @@ impl SnapshotsApplierTask { #[derive(Debug, Clone, Copy)] enum SnapshotRecoveryStrategy { /// Snapshot recovery should proceed from scratch with the specified params. - New, + New(SnapshotVersion), /// Snapshot recovery should continue with the specified params. - Resumed, + Resumed(SnapshotVersion), /// Snapshot recovery has already been completed. Completed, } @@ -360,9 +360,20 @@ impl SnapshotRecoveryStrategy { return Ok((Self::Completed, applied_snapshot_status)); } + let l1_batch_number = applied_snapshot_status.l1_batch_number; + let snapshot_header = main_node_client + .fetch_snapshot(l1_batch_number) + .await? + .with_context(|| { + format!("snapshot for L1 batch #{l1_batch_number} is no longer present on main node") + })?; + // Old snapshots can theoretically be removed by the node, but in this case the snapshot data may be removed as well, + // so returning an error looks appropriate here. + let snapshot_version = Self::check_snapshot_version(snapshot_header.version)?; + let latency = latency.observe(); tracing::info!("Re-initialized snapshots applier after reset/failure in {latency:?}"); - Ok((Self::Resumed, applied_snapshot_status)) + Ok((Self::Resumed(snapshot_version), applied_snapshot_status)) } else { let is_genesis_needed = storage.blocks_dal().is_genesis_needed().await?; if !is_genesis_needed { @@ -372,7 +383,7 @@ impl SnapshotRecoveryStrategy { return Err(SnapshotsApplierError::Fatal(err)); } - let recovery_status = + let (recovery_status, snapshot_version) = Self::create_fresh_recovery_status(main_node_client, snapshot_l1_batch).await?; let storage_logs_count = storage @@ -390,14 +401,14 @@ impl SnapshotRecoveryStrategy { let latency = latency.observe(); tracing::info!("Initialized fresh snapshots applier in {latency:?}"); - Ok((Self::New, recovery_status)) + Ok((Self::New(snapshot_version), recovery_status)) } } async fn create_fresh_recovery_status( main_node_client: &dyn SnapshotsApplierMainNodeClient, snapshot_l1_batch: Option, - ) -> Result { + ) -> Result<(SnapshotRecoveryStatus, SnapshotVersion), SnapshotsApplierError> { let l1_batch_number = match snapshot_l1_batch { Some(num) => num, None => main_node_client @@ -417,7 +428,7 @@ impl SnapshotRecoveryStrategy { version = snapshot.version, chunk_count = snapshot.storage_logs_chunks.len() ); - Self::check_snapshot_version(snapshot.version)?; + let snapshot_version = Self::check_snapshot_version(snapshot.version)?; let l1_batch = main_node_client .fetch_l1_batch_details(l1_batch_number) @@ -445,7 +456,7 @@ impl SnapshotRecoveryStrategy { return Err(err.into()); } - Ok(SnapshotRecoveryStatus { + let status = SnapshotRecoveryStatus { l1_batch_number, l1_batch_timestamp: l1_batch.base.timestamp, l1_batch_root_hash, @@ -454,22 +465,105 @@ impl SnapshotRecoveryStrategy { l2_block_hash, protocol_version, storage_logs_chunks_processed: vec![false; snapshot.storage_logs_chunks.len()], - }) + }; + Ok((status, snapshot_version)) } - fn check_snapshot_version(raw_version: u16) -> anyhow::Result<()> { + fn check_snapshot_version(raw_version: u16) -> anyhow::Result { let version = SnapshotVersion::try_from(raw_version).with_context(|| { format!( "Unrecognized snapshot version: {raw_version}; make sure you're running the latest version of the node" ) })?; anyhow::ensure!( - matches!(version, SnapshotVersion::Version0), - "Cannot recover from a snapshot with version {version:?}; the only supported version is {:?}", - SnapshotVersion::Version0 + matches!(version, SnapshotVersion::Version0 | SnapshotVersion::Version1), + "Cannot recover from a snapshot with version {version:?}; the only supported versions are {:?}", + [SnapshotVersion::Version0, SnapshotVersion::Version1] ); + Ok(version) + } +} + +/// Versioned storage logs chunk. +#[derive(Debug)] +enum StorageLogs { + V0(Vec>), + V1(Vec), +} + +impl StorageLogs { + async fn load( + blob_store: &dyn ObjectStore, + key: SnapshotStorageLogsStorageKey, + version: SnapshotVersion, + ) -> Result { + match version { + SnapshotVersion::Version0 => { + let logs: SnapshotStorageLogsChunk = blob_store.get(key).await?; + Ok(Self::V0(logs.storage_logs)) + } + SnapshotVersion::Version1 => { + let logs: SnapshotStorageLogsChunk = blob_store.get(key).await?; + Ok(Self::V1(logs.storage_logs)) + } + } + } + + fn len(&self) -> usize { + match self { + Self::V0(logs) => logs.len(), + Self::V1(logs) => logs.len(), + } + } + + /// Performs basic sanity check for a storage logs chunk. + fn validate(&self, snapshot_status: &SnapshotRecoveryStatus) -> anyhow::Result<()> { + match self { + Self::V0(logs) => Self::validate_inner(logs, snapshot_status), + Self::V1(logs) => Self::validate_inner(logs, snapshot_status), + } + } + + fn validate_inner( + storage_logs: &[SnapshotStorageLog], + snapshot_status: &SnapshotRecoveryStatus, + ) -> anyhow::Result<()> { + for log in storage_logs { + anyhow::ensure!( + log.enumeration_index > 0, + "invalid storage log with zero enumeration_index: {log:?}" + ); + anyhow::ensure!( + log.l1_batch_number_of_initial_write <= snapshot_status.l1_batch_number, + "invalid storage log with `l1_batch_number_of_initial_write` from the future: {log:?}" + ); + } Ok(()) } + + fn drop_key_preimages(&mut self) { + match self { + Self::V0(logs) => { + *self = Self::V1( + mem::take(logs) + .into_iter() + .map(SnapshotStorageLog::drop_key_preimage) + .collect(), + ); + } + Self::V1(_) => { /* do nothing */ } + } + } + + fn without_preimages(self) -> Vec { + match self { + Self::V0(logs) => logs + .into_iter() + .map(SnapshotStorageLog::drop_key_preimage) + .collect(), + Self::V1(logs) => logs, + } + } } /// Applying application-level storage snapshots to the Postgres storage. @@ -480,7 +574,9 @@ struct SnapshotsApplier<'a> { blob_store: &'a dyn ObjectStore, applied_snapshot_status: SnapshotRecoveryStatus, health_updater: &'a HealthUpdater, + snapshot_version: SnapshotVersion, max_concurrency: usize, + drop_storage_key_preimages: bool, factory_deps_recovered: bool, tokens_recovered: bool, } @@ -488,13 +584,12 @@ struct SnapshotsApplier<'a> { impl<'a> SnapshotsApplier<'a> { /// Returns final snapshot recovery status. async fn load_snapshot( - connection_pool: &'a ConnectionPool, - main_node_client: &'a dyn SnapshotsApplierMainNodeClient, - blob_store: &'a dyn ObjectStore, - health_updater: &'a HealthUpdater, - snapshot_l1_batch: Option, - max_concurrency: usize, + task: &'a SnapshotsApplierTask, ) -> Result<(SnapshotRecoveryStrategy, SnapshotRecoveryStatus), SnapshotsApplierError> { + let health_updater = &task.health_updater; + let connection_pool = &task.connection_pool; + let main_node_client = task.main_node_client.as_ref(); + // While the recovery is in progress, the node is healthy (no error has occurred), // but is affected (its usual APIs don't work). health_updater.update(HealthStatus::Affected.into()); @@ -507,23 +602,25 @@ impl<'a> SnapshotsApplier<'a> { let (strategy, applied_snapshot_status) = SnapshotRecoveryStrategy::new( &mut storage_transaction, main_node_client, - snapshot_l1_batch, + task.snapshot_l1_batch, ) .await?; tracing::info!("Chosen snapshot recovery strategy: {strategy:?} with status: {applied_snapshot_status:?}"); - let created_from_scratch = match strategy { + let (created_from_scratch, snapshot_version) = match strategy { SnapshotRecoveryStrategy::Completed => return Ok((strategy, applied_snapshot_status)), - SnapshotRecoveryStrategy::New => true, - SnapshotRecoveryStrategy::Resumed => false, + SnapshotRecoveryStrategy::New(version) => (true, version), + SnapshotRecoveryStrategy::Resumed(version) => (false, version), }; let mut this = Self { connection_pool, main_node_client, - blob_store, + blob_store: task.blob_store.as_ref(), applied_snapshot_status, health_updater, - max_concurrency, + snapshot_version, + max_concurrency: task.config.max_concurrency.get(), + drop_storage_key_preimages: task.drop_storage_key_preimages, factory_deps_recovered: !created_from_scratch, tokens_recovered: false, }; @@ -658,16 +755,30 @@ impl<'a> SnapshotsApplier<'a> { async fn insert_storage_logs_chunk( &self, - storage_logs: &[SnapshotStorageLog], + storage_logs: &StorageLogs, storage: &mut Connection<'_, Core>, ) -> Result<(), SnapshotsApplierError> { - storage - .storage_logs_dal() - .insert_storage_logs_from_snapshot( - self.applied_snapshot_status.l2_block_number, - storage_logs, - ) - .await?; + match storage_logs { + StorageLogs::V0(logs) => { + #[allow(deprecated)] + storage + .storage_logs_dal() + .insert_storage_logs_with_preimages_from_snapshot( + self.applied_snapshot_status.l2_block_number, + logs, + ) + .await?; + } + StorageLogs::V1(logs) => { + storage + .storage_logs_dal() + .insert_storage_logs_from_snapshot( + self.applied_snapshot_status.l2_block_number, + logs, + ) + .await?; + } + } Ok(()) } @@ -688,14 +799,19 @@ impl<'a> SnapshotsApplier<'a> { chunk_id, l1_batch_number: self.applied_snapshot_status.l1_batch_number, }; - let storage_snapshot_chunk: SnapshotStorageLogsChunk = - self.blob_store.get(storage_key).await.map_err(|err| { - let context = - format!("cannot fetch storage logs {storage_key:?} from object store"); - SnapshotsApplierError::object_store(err, context) - })?; - let storage_logs = &storage_snapshot_chunk.storage_logs; - self.validate_storage_logs_chunk(storage_logs)?; + let mut storage_logs = + StorageLogs::load(self.blob_store, storage_key, self.snapshot_version) + .await + .map_err(|err| { + let context = + format!("cannot fetch storage logs {storage_key:?} from object store"); + SnapshotsApplierError::object_store(err, context) + })?; + + storage_logs.validate(&self.applied_snapshot_status)?; + if self.drop_storage_key_preimages { + storage_logs.drop_key_preimages(); + } let latency = latency.observe(); tracing::info!( "Loaded {} storage logs from GCS for chunk {chunk_id} in {latency:?}", @@ -712,9 +828,11 @@ impl<'a> SnapshotsApplier<'a> { let mut storage_transaction = storage.start_transaction().await?; tracing::info!("Loading {} storage logs into Postgres", storage_logs.len()); - self.insert_storage_logs_chunk(storage_logs, &mut storage_transaction) + + self.insert_storage_logs_chunk(&storage_logs, &mut storage_transaction) .await?; - self.insert_initial_writes_chunk(storage_logs, &mut storage_transaction) + let storage_logs = storage_logs.without_preimages(); + self.insert_initial_writes_chunk(&storage_logs, &mut storage_transaction) .await?; storage_transaction @@ -730,24 +848,6 @@ impl<'a> SnapshotsApplier<'a> { Ok(()) } - /// Performs basic sanity check for a storage logs chunk. - fn validate_storage_logs_chunk( - &self, - storage_logs: &[SnapshotStorageLog], - ) -> anyhow::Result<()> { - for log in storage_logs { - anyhow::ensure!( - log.enumeration_index > 0, - "invalid storage log with zero enumeration_index: {log:?}" - ); - anyhow::ensure!( - log.l1_batch_number_of_initial_write <= self.applied_snapshot_status.l1_batch_number, - "invalid storage log with `l1_batch_number_of_initial_write` from the future: {log:?}" - ); - } - Ok(()) - } - async fn recover_storage_logs(&self) -> Result<(), SnapshotsApplierError> { let effective_concurrency = (self.connection_pool.max_size() as usize).min(self.max_concurrency); diff --git a/core/lib/snapshots_applier/src/tests/mod.rs b/core/lib/snapshots_applier/src/tests/mod.rs index b15f8bc657bf..2f78bdc274d6 100644 --- a/core/lib/snapshots_applier/src/tests/mod.rs +++ b/core/lib/snapshots_applier/src/tests/mod.rs @@ -39,10 +39,8 @@ async fn snapshots_creator_can_successfully_recover_db( let expected_status = mock_recovery_status(); let storage_logs = random_storage_logs(expected_status.l1_batch_number, 200); let (object_store, client) = prepare_clients(&expected_status, &storage_logs).await; - let storage_logs_by_hashed_key: HashMap<_, _> = storage_logs - .into_iter() - .map(|log| (log.key.hashed_key(), log)) - .collect(); + let storage_logs_by_hashed_key: HashMap<_, _> = + storage_logs.into_iter().map(|log| (log.key, log)).collect(); let object_store_with_errors; let object_store = if with_object_store_errors { @@ -103,8 +101,9 @@ async fn snapshots_creator_can_successfully_recover_db( assert_eq!(all_storage_logs.len(), storage_logs_by_hashed_key.len()); for db_log in all_storage_logs { let expected_log = &storage_logs_by_hashed_key[&db_log.hashed_key]; - assert_eq!(db_log.address, *expected_log.key.address()); - assert_eq!(db_log.key, *expected_log.key.key()); + assert_eq!(db_log.hashed_key, expected_log.key); + assert!(db_log.key.is_none()); + assert!(db_log.address.is_none()); assert_eq!(db_log.value, expected_log.value); assert_eq!(db_log.l2_block_number, expected_status.l2_block_number); } @@ -143,11 +142,58 @@ async fn snapshots_creator_can_successfully_recover_db( assert!(!stats.done_work); } +#[test_casing(2, [false, true])] +#[tokio::test] +async fn applier_recovers_v0_snapshot(drop_storage_key_preimages: bool) { + let pool = ConnectionPool::::test_pool().await; + let expected_status = mock_recovery_status(); + let storage_logs = random_storage_logs::(expected_status.l1_batch_number, 200); + let (object_store, client) = prepare_clients(&expected_status, &storage_logs).await; + + let mut task = SnapshotsApplierTask::new( + SnapshotsApplierConfig::for_tests(), + pool.clone(), + Box::new(client), + object_store, + ); + if drop_storage_key_preimages { + task.drop_storage_key_preimages(); + } + let stats = task.run().await.unwrap(); + assert!(stats.done_work); + + let mut storage = pool.connection().await.unwrap(); + let all_storage_logs = storage + .storage_logs_dal() + .dump_all_storage_logs_for_tests() + .await; + assert_eq!(all_storage_logs.len(), storage_logs.len()); + + let storage_logs_by_hashed_key: HashMap<_, _> = storage_logs + .into_iter() + .map(|log| (log.key.hashed_key(), log)) + .collect(); + for db_log in all_storage_logs { + let expected_log = &storage_logs_by_hashed_key[&db_log.hashed_key]; + assert_eq!(db_log.hashed_key, expected_log.key.hashed_key()); + assert_eq!(db_log.value, expected_log.value); + assert_eq!(db_log.l2_block_number, expected_status.l2_block_number); + + if drop_storage_key_preimages { + assert!(db_log.key.is_none()); + assert!(db_log.address.is_none()); + } else { + assert_eq!(db_log.key, Some(*expected_log.key.key())); + assert_eq!(db_log.address, Some(*expected_log.key.address())); + } + } +} + #[tokio::test] async fn applier_recovers_explicitly_specified_snapshot() { let pool = ConnectionPool::::test_pool().await; let expected_status = mock_recovery_status(); - let storage_logs = random_storage_logs(expected_status.l1_batch_number, 200); + let storage_logs = random_storage_logs::(expected_status.l1_batch_number, 200); let (object_store, client) = prepare_clients(&expected_status, &storage_logs).await; let mut task = SnapshotsApplierTask::new( @@ -172,7 +218,7 @@ async fn applier_recovers_explicitly_specified_snapshot() { async fn applier_error_for_missing_explicitly_specified_snapshot() { let pool = ConnectionPool::::test_pool().await; let expected_status = mock_recovery_status(); - let storage_logs = random_storage_logs(expected_status.l1_batch_number, 200); + let storage_logs = random_storage_logs::(expected_status.l1_batch_number, 200); let (object_store, client) = prepare_clients(&expected_status, &storage_logs).await; let mut task = SnapshotsApplierTask::new( @@ -195,7 +241,7 @@ async fn snapshot_applier_recovers_after_stopping() { let pool = ConnectionPool::::test_pool().await; let mut expected_status = mock_recovery_status(); expected_status.storage_logs_chunks_processed = vec![true; 10]; - let storage_logs = random_storage_logs(expected_status.l1_batch_number, 200); + let storage_logs = random_storage_logs::(expected_status.l1_batch_number, 200); let (object_store, client) = prepare_clients(&expected_status, &storage_logs).await; let (stopping_object_store, mut stop_receiver) = HangingObjectStore::new(object_store.clone(), 1); @@ -402,10 +448,7 @@ async fn applier_errors_with_unrecognized_snapshot_version() { let object_store = MockObjectStore::arc(); let expected_status = mock_recovery_status(); let client = MockMainNodeClient { - fetch_newest_snapshot_response: Some(SnapshotHeader { - version: u16::MAX, - ..mock_snapshot_header(&expected_status) - }), + fetch_newest_snapshot_response: Some(mock_snapshot_header(u16::MAX, &expected_status)), ..MockMainNodeClient::default() }; @@ -422,7 +465,7 @@ async fn applier_errors_with_unrecognized_snapshot_version() { async fn applier_returns_error_on_fatal_object_store_error() { let pool = ConnectionPool::::test_pool().await; let expected_status = mock_recovery_status(); - let storage_logs = random_storage_logs(expected_status.l1_batch_number, 100); + let storage_logs = random_storage_logs::(expected_status.l1_batch_number, 100); let (object_store, client) = prepare_clients(&expected_status, &storage_logs).await; let object_store = ObjectStoreWithErrors::new(object_store, |_| { Err(ObjectStoreError::KeyNotFound("not found".into())) @@ -447,7 +490,7 @@ async fn applier_returns_error_on_fatal_object_store_error() { async fn applier_returns_error_after_too_many_object_store_retries() { let pool = ConnectionPool::::test_pool().await; let expected_status = mock_recovery_status(); - let storage_logs = random_storage_logs(expected_status.l1_batch_number, 100); + let storage_logs = random_storage_logs::(expected_status.l1_batch_number, 100); let (object_store, client) = prepare_clients(&expected_status, &storage_logs).await; let object_store = ObjectStoreWithErrors::new(object_store, |_| { Err(ObjectStoreError::Other { @@ -482,7 +525,7 @@ async fn recovering_tokens() { continue; } storage_logs.push(SnapshotStorageLog { - key: get_code_key(&token.l2_address), + key: get_code_key(&token.l2_address).hashed_key(), value: H256::random(), l1_batch_number_of_initial_write: expected_status.l1_batch_number, enumeration_index: storage_logs.len() as u64 + 1, diff --git a/core/lib/snapshots_applier/src/tests/utils.rs b/core/lib/snapshots_applier/src/tests/utils.rs index e683e0cae00f..3374e62452d8 100644 --- a/core/lib/snapshots_applier/src/tests/utils.rs +++ b/core/lib/snapshots_applier/src/tests/utils.rs @@ -4,7 +4,7 @@ use std::{collections::HashMap, fmt, future, sync::Arc}; use async_trait::async_trait; use tokio::sync::watch; -use zksync_object_store::{Bucket, MockObjectStore, ObjectStore, ObjectStoreError}; +use zksync_object_store::{Bucket, MockObjectStore, ObjectStore, ObjectStoreError, StoredObject}; use zksync_types::{ api, block::L2BlockHeader, @@ -16,12 +16,34 @@ use zksync_types::{ tokens::{TokenInfo, TokenMetadata}, web3::Bytes, AccountTreeId, Address, L1BatchNumber, L2BlockNumber, ProtocolVersionId, StorageKey, - StorageValue, H160, H256, + StorageValue, H256, }; use zksync_web3_decl::error::EnrichedClientResult; use crate::SnapshotsApplierMainNodeClient; +pub(super) trait SnapshotLogKey: Clone { + const VERSION: SnapshotVersion; + + fn random() -> Self; +} + +impl SnapshotLogKey for H256 { + const VERSION: SnapshotVersion = SnapshotVersion::Version1; + + fn random() -> Self { + Self::random() + } +} + +impl SnapshotLogKey for StorageKey { + const VERSION: SnapshotVersion = SnapshotVersion::Version0; + + fn random() -> Self { + Self::new(AccountTreeId::new(Address::random()), H256::random()) + } +} + #[derive(Debug, Clone, Default)] pub(super) struct MockMainNodeClient { pub fetch_l1_batch_responses: HashMap, @@ -182,16 +204,13 @@ fn l1_batch_details(number: L1BatchNumber, root_hash: H256) -> api::L1BatchDetai } } -pub(super) fn random_storage_logs( +pub(super) fn random_storage_logs( l1_batch_number: L1BatchNumber, count: u64, -) -> Vec { +) -> Vec> { (0..count) .map(|i| SnapshotStorageLog { - key: StorageKey::new( - AccountTreeId::from_fixed_bytes(H160::random().to_fixed_bytes()), - H256::random(), - ), + key: K::random(), value: StorageValue::random(), l1_batch_number_of_initial_write: l1_batch_number, enumeration_index: i + 1, @@ -235,9 +254,12 @@ pub(super) fn mock_tokens() -> Vec { ] } -pub(super) fn mock_snapshot_header(status: &SnapshotRecoveryStatus) -> SnapshotHeader { +pub(super) fn mock_snapshot_header( + version: u16, + status: &SnapshotRecoveryStatus, +) -> SnapshotHeader { SnapshotHeader { - version: SnapshotVersion::Version0.into(), + version, l1_batch_number: status.l1_batch_number, l2_block_number: status.l2_block_number, storage_logs_chunks: (0..status.storage_logs_chunks_processed.len() as u64) @@ -250,10 +272,14 @@ pub(super) fn mock_snapshot_header(status: &SnapshotRecoveryStatus) -> SnapshotH } } -pub(super) async fn prepare_clients( +pub(super) async fn prepare_clients( status: &SnapshotRecoveryStatus, - logs: &[SnapshotStorageLog], -) -> (Arc, MockMainNodeClient) { + logs: &[SnapshotStorageLog], +) -> (Arc, MockMainNodeClient) +where + K: SnapshotLogKey, + for<'a> SnapshotStorageLogsChunk: StoredObject = SnapshotStorageLogsStorageKey>, +{ let object_store = MockObjectStore::arc(); let mut client = MockMainNodeClient::default(); let factory_dep_bytes: Vec = (0..32).collect(); @@ -286,7 +312,7 @@ pub(super) async fn prepare_clients( .unwrap(); } - client.fetch_newest_snapshot_response = Some(mock_snapshot_header(status)); + client.fetch_newest_snapshot_response = Some(mock_snapshot_header(K::VERSION.into(), status)); client.fetch_l1_batch_responses.insert( status.l1_batch_number, l1_batch_details(status.l1_batch_number, status.l1_batch_root_hash), diff --git a/core/lib/state/src/postgres/mod.rs b/core/lib/state/src/postgres/mod.rs index 17163af0d56f..5bcdfc34cb03 100644 --- a/core/lib/state/src/postgres/mod.rs +++ b/core/lib/state/src/postgres/mod.rs @@ -42,12 +42,12 @@ impl CacheValue for TimestampedFactoryDep { } /// Type alias for initial writes caches. -type InitialWritesCache = LruCache; +type InitialWritesCache = LruCache; -impl CacheValue for L1BatchNumber { +impl CacheValue for L1BatchNumber { #[allow(clippy::cast_possible_truncation)] // doesn't happen in practice fn cache_weight(&self) -> u32 { - const WEIGHT: usize = mem::size_of::() + mem::size_of::(); + const WEIGHT: usize = mem::size_of::() + mem::size_of::(); // ^ Since values are small, we want to account for key sizes as well WEIGHT as u32 @@ -122,7 +122,7 @@ impl ValuesCache { /// Gets the cached value for `key` provided that the cache currently holds values /// for `l2_block_number`. - fn get(&self, l2_block_number: L2BlockNumber, key: &StorageKey) -> Option { + fn get(&self, l2_block_number: L2BlockNumber, hashed_key: H256) -> Option { let lock = self.0.read().expect("values cache is poisoned"); if lock.valid_for < l2_block_number { // The request is from the future; we cannot say which values in the cache remain valid, @@ -130,7 +130,7 @@ impl ValuesCache { return None; } - let timestamped_value = lock.values.get(&key.hashed_key())?; + let timestamped_value = lock.values.get(&hashed_key)?; if timestamped_value.loaded_at <= l2_block_number { Some(timestamped_value.value) } else { @@ -139,11 +139,11 @@ impl ValuesCache { } /// Caches `value` for `key`, but only if the cache currently holds values for `l2_block_number`. - fn insert(&self, l2_block_number: L2BlockNumber, key: StorageKey, value: StorageValue) { + fn insert(&self, l2_block_number: L2BlockNumber, hashed_key: H256, value: StorageValue) { let lock = self.0.read().expect("values cache is poisoned"); if lock.valid_for == l2_block_number { lock.values.insert( - key.hashed_key(), + hashed_key, TimestampedStorageValue { value, loaded_at: l2_block_number, @@ -481,19 +481,21 @@ impl<'a> PostgresStorage<'a> { } impl ReadStorage for PostgresStorage<'_> { - fn read_value(&mut self, &key: &StorageKey) -> StorageValue { + fn read_value(&mut self, key: &StorageKey) -> StorageValue { + let hashed_key = key.hashed_key(); let latency = STORAGE_METRICS.storage[&Method::ReadValue].start(); let values_cache = self.values_cache(); - let cached_value = values_cache.and_then(|cache| cache.get(self.l2_block_number, &key)); + let cached_value = + values_cache.and_then(|cache| cache.get(self.l2_block_number, hashed_key)); let value = cached_value.unwrap_or_else(|| { let mut dal = self.connection.storage_web3_dal(); let value = self .rt_handle - .block_on(dal.get_historical_value_unchecked(&key, self.l2_block_number)) + .block_on(dal.get_historical_value_unchecked(hashed_key, self.l2_block_number)) .expect("Failed executing `read_value`"); if let Some(cache) = self.values_cache() { - cache.insert(self.l2_block_number, key, value); + cache.insert(self.l2_block_number, hashed_key, value); } value }); @@ -503,13 +505,15 @@ impl ReadStorage for PostgresStorage<'_> { } fn is_write_initial(&mut self, key: &StorageKey) -> bool { + let hashed_key = key.hashed_key(); let latency = STORAGE_METRICS.storage[&Method::IsWriteInitial].start(); let caches = self.caches.as_ref(); - let cached_value = caches.and_then(|caches| caches.initial_writes.get(key)); + let cached_value = caches.and_then(|caches| caches.initial_writes.get(&hashed_key)); if cached_value.is_none() { // Write is absent in positive cache, check whether it's present in the negative cache. - let cached_value = caches.and_then(|caches| caches.negative_initial_writes.get(key)); + let cached_value = + caches.and_then(|caches| caches.negative_initial_writes.get(&hashed_key)); if let Some(min_l1_batch_for_initial_write) = cached_value { // We know that this slot was certainly not touched before `min_l1_batch_for_initial_write`. // Try to use this knowledge to decide if the change is certainly initial. @@ -526,17 +530,17 @@ impl ReadStorage for PostgresStorage<'_> { let mut dal = self.connection.storage_web3_dal(); let value = self .rt_handle - .block_on(dal.get_l1_batch_number_for_initial_write(key)) + .block_on(dal.get_l1_batch_number_for_initial_write(hashed_key)) .expect("Failed executing `is_write_initial`"); if let Some(caches) = &self.caches { if let Some(l1_batch_number) = value { - caches.negative_initial_writes.remove(key); - caches.initial_writes.insert(*key, l1_batch_number); + caches.negative_initial_writes.remove(&hashed_key); + caches.initial_writes.insert(hashed_key, l1_batch_number); } else { caches .negative_initial_writes - .insert(*key, self.pending_l1_batch_number); + .insert(hashed_key, self.pending_l1_batch_number); // The pending L1 batch might have been sealed since its number was requested from Postgres // in `Self::new()`, so this is a somewhat conservative estimate. } @@ -589,13 +593,11 @@ impl ReadStorage for PostgresStorage<'_> { } fn get_enumeration_index(&mut self, key: &StorageKey) -> Option { + let hashed_key = key.hashed_key(); let mut dal = self.connection.storage_logs_dedup_dal(); - let value = self - .rt_handle - .block_on(dal.get_enumeration_index_in_l1_batch( - key.hashed_key(), - self.l1_batch_number_for_l2_block, - )); + let value = self.rt_handle.block_on( + dal.get_enumeration_index_in_l1_batch(hashed_key, self.l1_batch_number_for_l2_block), + ); value.expect("failed getting enumeration index for key") } } diff --git a/core/lib/state/src/postgres/tests.rs b/core/lib/state/src/postgres/tests.rs index 4ab8ebb12a7b..f88055fa0479 100644 --- a/core/lib/state/src/postgres/tests.rs +++ b/core/lib/state/src/postgres/tests.rs @@ -318,11 +318,15 @@ fn test_initial_writes_cache(pool: &ConnectionPool, rt_handle: Handle) { assert!(storage.is_write_initial(&logs[0].key)); assert!(storage.is_write_initial(&non_existing_key)); assert_eq!( - caches.negative_initial_writes.get(&logs[0].key), + caches + .negative_initial_writes + .get(&logs[0].key.hashed_key()), Some(L1BatchNumber(0)) ); assert_eq!( - caches.negative_initial_writes.get(&non_existing_key), + caches + .negative_initial_writes + .get(&non_existing_key.hashed_key()), Some(L1BatchNumber(0)) ); assert!(storage.is_write_initial(&logs[0].key)); @@ -353,12 +357,19 @@ fn test_initial_writes_cache(pool: &ConnectionPool, rt_handle: Handle) { // Check that the cache entries have been updated assert_eq!( - caches.initial_writes.get(&logs[0].key), + caches.initial_writes.get(&logs[0].key.hashed_key()), Some(L1BatchNumber(1)) ); - assert_eq!(caches.negative_initial_writes.get(&logs[0].key), None); assert_eq!( - caches.negative_initial_writes.get(&non_existing_key), + caches + .negative_initial_writes + .get(&logs[0].key.hashed_key()), + None + ); + assert_eq!( + caches + .negative_initial_writes + .get(&non_existing_key.hashed_key()), Some(L1BatchNumber(2)) ); assert!(storage.is_write_initial(&logs[0].key)); @@ -376,11 +387,13 @@ fn test_initial_writes_cache(pool: &ConnectionPool, rt_handle: Handle) { // Check that the cache entries are still as expected. assert_eq!( - caches.initial_writes.get(&logs[0].key), + caches.initial_writes.get(&logs[0].key.hashed_key()), Some(L1BatchNumber(1)) ); assert_eq!( - caches.negative_initial_writes.get(&non_existing_key), + caches + .negative_initial_writes + .get(&non_existing_key.hashed_key()), Some(L1BatchNumber(2)) ); @@ -415,7 +428,10 @@ struct ValueCacheAssertions<'a> { impl ValueCacheAssertions<'_> { fn assert_entries(&self, expected_entries: &[(StorageKey, Option)]) { for (key, expected_value) in expected_entries { - assert_eq!(self.cache.get(self.l2_block_number, key), *expected_value); + assert_eq!( + self.cache.get(self.l2_block_number, key.hashed_key()), + *expected_value + ); } } } diff --git a/core/lib/state/src/rocksdb/mod.rs b/core/lib/state/src/rocksdb/mod.rs index bda416cb433b..aab33c7dfe83 100644 --- a/core/lib/state/src/rocksdb/mod.rs +++ b/core/lib/state/src/rocksdb/mod.rs @@ -400,7 +400,7 @@ impl RocksdbStorage { async fn apply_storage_logs( &mut self, - storage_logs: HashMap, + storage_logs: HashMap, storage: &mut Connection<'_, Core>, ) -> anyhow::Result<()> { let db = self.db.clone(); @@ -409,12 +409,13 @@ impl RocksdbStorage { .await .context("panicked processing storage logs")?; - let (logs_with_known_indices, logs_with_unknown_indices): (Vec<_>, Vec<_>) = processed_logs - .into_iter() - .partition_map(|(key, StateValue { value, enum_index })| match enum_index { - Some(index) => Either::Left((key.hashed_key(), (value, index))), - None => Either::Right((key.hashed_key(), value)), - }); + let (logs_with_known_indices, logs_with_unknown_indices): (Vec<_>, Vec<_>) = + processed_logs.into_iter().partition_map( + |(hashed_key, StateValue { value, enum_index })| match enum_index { + Some(index) => Either::Left((hashed_key, (value, index))), + None => Either::Right((hashed_key, value)), + }, + ); let keys_with_unknown_indices: Vec<_> = logs_with_unknown_indices .iter() .map(|&(key, _)| key) @@ -440,8 +441,8 @@ impl RocksdbStorage { Ok(()) } - fn read_value_inner(&self, key: &StorageKey) -> Option { - Self::read_state_value(&self.db, key.hashed_key()).map(|state_value| state_value.value) + fn read_value_inner(&self, hashed_key: H256) -> Option { + Self::read_state_value(&self.db, hashed_key).map(|state_value| state_value.value) } fn read_state_value( @@ -457,15 +458,20 @@ impl RocksdbStorage { /// Returns storage logs to apply. fn process_transaction_logs( db: &RocksDB, - updates: HashMap, - ) -> Vec<(StorageKey, StateValue)> { - let it = updates.into_iter().filter_map(move |(key, new_value)| { - if let Some(state_value) = Self::read_state_value(db, key.hashed_key()) { - Some((key, StateValue::new(new_value, state_value.enum_index))) - } else { - (!new_value.is_zero()).then_some((key, StateValue::new(new_value, None))) - } - }); + updates: HashMap, + ) -> Vec<(H256, StateValue)> { + let it = updates + .into_iter() + .filter_map(move |(hashed_key, new_value)| { + if let Some(state_value) = Self::read_state_value(db, hashed_key) { + Some(( + hashed_key, + StateValue::new(new_value, state_value.enum_index), + )) + } else { + (!new_value.is_zero()).then_some((hashed_key, StateValue::new(new_value, None))) + } + }); it.collect() } @@ -617,11 +623,12 @@ impl RocksdbStorage { impl ReadStorage for RocksdbStorage { fn read_value(&mut self, key: &StorageKey) -> StorageValue { - self.read_value_inner(key).unwrap_or_else(H256::zero) + self.read_value_inner(key.hashed_key()) + .unwrap_or_else(H256::zero) } fn is_write_initial(&mut self, key: &StorageKey) -> bool { - self.read_value_inner(key).is_none() + self.read_value_inner(key.hashed_key()).is_none() } fn load_factory_dep(&mut self, hash: H256) -> Option> { diff --git a/core/lib/state/src/rocksdb/tests.rs b/core/lib/state/src/rocksdb/tests.rs index a006fcba4750..e73590015079 100644 --- a/core/lib/state/src/rocksdb/tests.rs +++ b/core/lib/state/src/rocksdb/tests.rs @@ -40,6 +40,12 @@ impl Default for RocksdbStorageEventListener { } } +fn hash_storage_log_keys(logs: &HashMap) -> HashMap { + logs.iter() + .map(|(key, value)| (key.hashed_key(), *value)) + .collect() +} + #[tokio::test] async fn rocksdb_storage_basics() { let dir = TempDir::new().expect("cannot create temporary dir for state keeper"); @@ -50,10 +56,11 @@ async fn rocksdb_storage_basics() { .into_iter() .map(|log| (log.key, log.value)) .collect(); - let changed_keys = RocksdbStorage::process_transaction_logs(&storage.db, storage_logs.clone()); + let changed_keys = + RocksdbStorage::process_transaction_logs(&storage.db, hash_storage_log_keys(&storage_logs)); storage.pending_patch.state = changed_keys .into_iter() - .map(|(key, state_value)| (key.hashed_key(), (state_value.value, 1))) // enum index doesn't matter in the test + .map(|(key, state_value)| (key, (state_value.value, 1))) // enum index doesn't matter in the test .collect(); storage.save(Some(L1BatchNumber(0))).await.unwrap(); { @@ -64,13 +71,14 @@ async fn rocksdb_storage_basics() { } // Overwrite some of the logs. - for log in storage_logs.values_mut().step_by(2) { - *log = StorageValue::zero(); + for log_value in storage_logs.values_mut().step_by(2) { + *log_value = StorageValue::zero(); } - let changed_keys = RocksdbStorage::process_transaction_logs(&storage.db, storage_logs.clone()); + let changed_keys = + RocksdbStorage::process_transaction_logs(&storage.db, hash_storage_log_keys(&storage_logs)); storage.pending_patch.state = changed_keys .into_iter() - .map(|(key, state_value)| (key.hashed_key(), (state_value.value, 1))) // enum index doesn't matter in the test + .map(|(key, state_value)| (key, (state_value.value, 1))) // enum index doesn't matter in the test .collect(); storage.save(Some(L1BatchNumber(1))).await.unwrap(); diff --git a/core/lib/state/src/shadow_storage.rs b/core/lib/state/src/shadow_storage.rs index 9ef1aacca155..5e32f9b25e71 100644 --- a/core/lib/state/src/shadow_storage.rs +++ b/core/lib/state/src/shadow_storage.rs @@ -50,9 +50,9 @@ impl<'a> ShadowStorage<'a> { } impl ReadStorage for ShadowStorage<'_> { - fn read_value(&mut self, &key: &StorageKey) -> StorageValue { - let source_value = self.source_storage.read_value(&key); - let expected_value = self.to_check_storage.read_value(&key); + fn read_value(&mut self, key: &StorageKey) -> StorageValue { + let source_value = self.source_storage.as_mut().read_value(key); + let expected_value = self.to_check_storage.as_mut().read_value(key); if source_value != expected_value { self.metrics.read_value_mismatch.inc(); tracing::error!( @@ -65,8 +65,8 @@ impl ReadStorage for ShadowStorage<'_> { } fn is_write_initial(&mut self, key: &StorageKey) -> bool { - let source_value = self.source_storage.is_write_initial(key); - let expected_value = self.to_check_storage.is_write_initial(key); + let source_value = self.source_storage.as_mut().is_write_initial(key); + let expected_value = self.to_check_storage.as_mut().is_write_initial(key); if source_value != expected_value { self.metrics.is_write_initial_mismatch.inc(); tracing::error!( @@ -93,18 +93,16 @@ impl ReadStorage for ShadowStorage<'_> { } fn get_enumeration_index(&mut self, key: &StorageKey) -> Option { - let source_value = self.source_storage.get_enumeration_index(key); - let expected_value = self.to_check_storage.get_enumeration_index(key); + let source_value = self.source_storage.as_mut().get_enumeration_index(key); + let expected_value = self.to_check_storage.as_mut().get_enumeration_index(key); if source_value != expected_value { tracing::error!( - "get_enumeration_index({:?}) -- l1_batch_number={:?} -- expected source={:?} to be equal to \ - to_check={:?}", key, self.l1_batch_number, source_value, expected_value + "get_enumeration_index({key:?}) -- l1_batch_number={:?} -- \ + expected source={source_value:?} to be equal to to_check={expected_value:?}", + self.l1_batch_number ); - self.metrics.get_enumeration_index_mismatch.inc(); } source_value } } - -// TODO: Add unit tests when we swap metrics crate; blocked by: https://linear.app/matterlabs/issue/QIT-3/rework-metrics-approach diff --git a/core/lib/state/src/storage_factory.rs b/core/lib/state/src/storage_factory.rs index 9f161cbeedf8..307fa465a7c9 100644 --- a/core/lib/state/src/storage_factory.rs +++ b/core/lib/state/src/storage_factory.rs @@ -30,7 +30,7 @@ pub trait ReadStorageFactory: Debug + Send + Sync + 'static { #[derive(Debug, Clone)] pub struct BatchDiff { /// Storage slots touched by this batch along with new values there. - pub state_diff: HashMap, + pub state_diff: HashMap, /// Initial write indices introduced by this batch. pub enum_index_diff: HashMap, /// Factory dependencies introduced by this batch. @@ -140,11 +140,12 @@ impl<'a> PgOrRocksdbStorage<'a> { impl ReadStorage for RocksdbWithMemory { fn read_value(&mut self, key: &StorageKey) -> StorageValue { + let hashed_key = key.hashed_key(); match self .batch_diffs .iter() .rev() - .find_map(|b| b.state_diff.get(key)) + .find_map(|b| b.state_diff.get(&hashed_key)) { None => self.rocksdb.read_value(key), Some(value) => *value, diff --git a/core/lib/state/src/storage_view.rs b/core/lib/state/src/storage_view.rs index 7756f6007eec..03962fdea13c 100644 --- a/core/lib/state/src/storage_view.rs +++ b/core/lib/state/src/storage_view.rs @@ -52,13 +52,6 @@ pub struct StorageView { metrics: StorageViewMetrics, } -impl StorageView { - /// Returns the modified storage keys - pub fn modified_storage_keys(&self) -> &HashMap { - &self.modified_storage_keys - } -} - impl ReadStorage for Box where S: ReadStorage + ?Sized, diff --git a/core/lib/state/src/test_utils.rs b/core/lib/state/src/test_utils.rs index 52febc5040ee..1d1731bf0015 100644 --- a/core/lib/state/src/test_utils.rs +++ b/core/lib/state/src/test_utils.rs @@ -118,6 +118,7 @@ pub(crate) async fn create_l1_batch( let mut written_keys: Vec<_> = logs_for_initial_writes.iter().map(|log| log.key).collect(); written_keys.sort_unstable(); + let written_keys: Vec<_> = written_keys.iter().map(StorageKey::hashed_key).collect(); conn.storage_logs_dedup_dal() .insert_initial_writes(l1_batch_number, &written_keys) .await @@ -154,6 +155,7 @@ pub(crate) async fn prepare_postgres_for_snapshot_recovery( .unwrap(); let mut written_keys: Vec<_> = snapshot_storage_logs.iter().map(|log| log.key).collect(); written_keys.sort_unstable(); + let written_keys: Vec<_> = written_keys.iter().map(StorageKey::hashed_key).collect(); conn.storage_logs_dedup_dal() .insert_initial_writes(snapshot_recovery.l1_batch_number, &written_keys) .await diff --git a/core/lib/types/src/proto/mod.proto b/core/lib/types/src/proto/mod.proto index 163215bb1237..1e2ae0ede515 100644 --- a/core/lib/types/src/proto/mod.proto +++ b/core/lib/types/src/proto/mod.proto @@ -7,8 +7,12 @@ message SnapshotStorageLogsChunk { } message SnapshotStorageLog { - optional bytes account_address = 1; // required; H160 - optional bytes storage_key = 2; // required; H256 + // `account_address` and `storage_key` fields are obsolete and are not used in the new snapshot format; + // `hashed_key` is used instead. The fields are retained for now to support recovery from old snapshots. + optional bytes account_address = 1; // optional; H160 + optional bytes storage_key = 2; // optional; H256 + optional bytes hashed_key = 6; // optional; H256 + optional bytes storage_value = 3; // required; H256 optional uint32 l1_batch_number_of_initial_write = 4; // required optional uint64 enumeration_index = 5; // required diff --git a/core/lib/types/src/snapshots.rs b/core/lib/types/src/snapshots.rs index 6e4f734a33c1..a29e5a91bf1e 100644 --- a/core/lib/types/src/snapshots.rs +++ b/core/lib/types/src/snapshots.rs @@ -25,6 +25,9 @@ pub struct AllSnapshots { pub enum SnapshotVersion { /// Initial snapshot version. Keys in storage logs are stored as `(address, key)` pairs. Version0 = 0, + /// Snapshot version made compatible with L1 recovery. Differs from `Version0` by including + /// hashed keys in storage logs instead of `(address, key)` pairs. + Version1 = 1, } /// Storage snapshot metadata. Used in DAL to fetch certain snapshot data. @@ -79,18 +82,33 @@ pub struct SnapshotStorageLogsStorageKey { } #[derive(Debug, Clone, PartialEq)] -pub struct SnapshotStorageLogsChunk { - pub storage_logs: Vec, +pub struct SnapshotStorageLogsChunk { + pub storage_logs: Vec>, } +/// Storage log record in a storage snapshot. +/// +/// Version 0 and version 1 snapshots differ in the key type; version 0 uses full [`StorageKey`]s (i.e., storage key preimages), +/// and version 1 uses [`H256`] hashed keys. See [`SnapshotVersion`] for details. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SnapshotStorageLog { - pub key: StorageKey, +pub struct SnapshotStorageLog { + pub key: K, pub value: StorageValue, pub l1_batch_number_of_initial_write: L1BatchNumber, pub enumeration_index: u64, } +impl SnapshotStorageLog { + pub fn drop_key_preimage(self) -> SnapshotStorageLog { + SnapshotStorageLog { + key: self.key.hashed_key(), + value: self.value, + l1_batch_number_of_initial_write: self.l1_batch_number_of_initial_write, + enumeration_index: self.enumeration_index, + } + } +} + #[derive(Debug, PartialEq)] pub struct SnapshotFactoryDependencies { pub factory_deps: Vec, @@ -144,17 +162,58 @@ impl ProtoFmt for SnapshotStorageLog { type Proto = crate::proto::SnapshotStorageLog; fn read(r: &Self::Proto) -> anyhow::Result { + let hashed_key = if let Some(hashed_key) = &r.hashed_key { + <[u8; 32]>::try_from(hashed_key.as_slice()) + .context("hashed_key")? + .into() + } else { + let address = required(&r.account_address) + .and_then(|bytes| Ok(<[u8; 20]>::try_from(bytes.as_slice())?.into())) + .context("account_address")?; + let key = required(&r.storage_key) + .and_then(|bytes| Ok(<[u8; 32]>::try_from(bytes.as_slice())?.into())) + .context("storage_key")?; + StorageKey::new(AccountTreeId::new(address), key).hashed_key() + }; + Ok(Self { - key: StorageKey::new( - AccountTreeId::new( - required(&r.account_address) - .and_then(|bytes| Ok(<[u8; 20]>::try_from(bytes.as_slice())?.into())) - .context("account_address")?, - ), - required(&r.storage_key) - .and_then(|bytes| Ok(<[u8; 32]>::try_from(bytes.as_slice())?.into())) - .context("storage_key")?, + key: hashed_key, + value: required(&r.storage_value) + .and_then(|bytes| Ok(<[u8; 32]>::try_from(bytes.as_slice())?.into())) + .context("storage_value")?, + l1_batch_number_of_initial_write: L1BatchNumber( + *required(&r.l1_batch_number_of_initial_write) + .context("l1_batch_number_of_initial_write")?, ), + enumeration_index: *required(&r.enumeration_index).context("enumeration_index")?, + }) + } + + fn build(&self) -> Self::Proto { + Self::Proto { + account_address: None, + storage_key: None, + hashed_key: Some(self.key.as_bytes().to_vec()), + storage_value: Some(self.value.as_bytes().to_vec()), + l1_batch_number_of_initial_write: Some(self.l1_batch_number_of_initial_write.0), + enumeration_index: Some(self.enumeration_index), + } + } +} + +impl ProtoFmt for SnapshotStorageLog { + type Proto = crate::proto::SnapshotStorageLog; + + fn read(r: &Self::Proto) -> anyhow::Result { + let address = required(&r.account_address) + .and_then(|bytes| Ok(<[u8; 20]>::try_from(bytes.as_slice())?.into())) + .context("account_address")?; + let key = required(&r.storage_key) + .and_then(|bytes| Ok(<[u8; 32]>::try_from(bytes.as_slice())?.into())) + .context("storage_key")?; + + Ok(Self { + key: StorageKey::new(AccountTreeId::new(address), key), value: required(&r.storage_value) .and_then(|bytes| Ok(<[u8; 32]>::try_from(bytes.as_slice())?.into())) .context("storage_value")?, @@ -168,23 +227,27 @@ impl ProtoFmt for SnapshotStorageLog { fn build(&self) -> Self::Proto { Self::Proto { - account_address: Some(self.key.address().as_bytes().into()), - storage_key: Some(self.key.key().as_bytes().into()), - storage_value: Some(self.value.as_bytes().into()), + account_address: Some(self.key.address().as_bytes().to_vec()), + storage_key: Some(self.key.key().as_bytes().to_vec()), + hashed_key: None, + storage_value: Some(self.value.as_bytes().to_vec()), l1_batch_number_of_initial_write: Some(self.l1_batch_number_of_initial_write.0), enumeration_index: Some(self.enumeration_index), } } } -impl ProtoFmt for SnapshotStorageLogsChunk { +impl ProtoFmt for SnapshotStorageLogsChunk +where + SnapshotStorageLog: ProtoFmt, +{ type Proto = crate::proto::SnapshotStorageLogsChunk; fn read(r: &Self::Proto) -> anyhow::Result { let mut storage_logs = Vec::with_capacity(r.storage_logs.len()); for (i, storage_log) in r.storage_logs.iter().enumerate() { storage_logs.push( - SnapshotStorageLog::read(storage_log) + SnapshotStorageLog::::read(storage_log) .with_context(|| format!("storage_log[{i}]"))?, ) } @@ -196,7 +259,7 @@ impl ProtoFmt for SnapshotStorageLogsChunk { storage_logs: self .storage_logs .iter() - .map(SnapshotStorageLog::build) + .map(SnapshotStorageLog::::build) .collect(), } } diff --git a/core/node/api_server/src/execution_sandbox/apply.rs b/core/node/api_server/src/execution_sandbox/apply.rs index e876a55b66f5..0d607311a445 100644 --- a/core/node/api_server/src/execution_sandbox/apply.rs +++ b/core/node/api_server/src/execution_sandbox/apply.rs @@ -366,7 +366,7 @@ impl StoredL2BlockInfo { ); let l2_block_info = connection .storage_web3_dal() - .get_historical_value_unchecked(&l2_block_info_key, l2_block_number) + .get_historical_value_unchecked(l2_block_info_key.hashed_key(), l2_block_number) .await .context("failed reading L2 block info from VM state")?; let (l2_block_number_from_state, l2_block_timestamp) = @@ -378,7 +378,10 @@ impl StoredL2BlockInfo { ); let txs_rolling_hash = connection .storage_web3_dal() - .get_historical_value_unchecked(&l2_block_txs_rolling_hash_key, l2_block_number) + .get_historical_value_unchecked( + l2_block_txs_rolling_hash_key.hashed_key(), + l2_block_number, + ) .await .context("failed reading transaction rolling hash from VM state")?; diff --git a/core/node/api_server/src/web3/namespaces/eth.rs b/core/node/api_server/src/web3/namespaces/eth.rs index 33dfa277dc1c..7b4710d1cd4a 100644 --- a/core/node/api_server/src/web3/namespaces/eth.rs +++ b/core/node/api_server/src/web3/namespaces/eth.rs @@ -407,7 +407,7 @@ impl EthNamespace { self.set_block_diff(block_number); let value = connection .storage_web3_dal() - .get_historical_value_unchecked(&storage_key, block_number) + .get_historical_value_unchecked(storage_key.hashed_key(), block_number) .await .map_err(DalError::generalize)?; Ok(value) diff --git a/core/node/block_reverter/src/tests.rs b/core/node/block_reverter/src/tests.rs index 7b989574b094..161ac3ed00c4 100644 --- a/core/node/block_reverter/src/tests.rs +++ b/core/node/block_reverter/src/tests.rs @@ -32,7 +32,11 @@ fn initialize_merkle_tree(path: &Path, storage_logs: &[StorageLog]) -> Vec let mut tree = ZkSyncTree::new(db.into()).unwrap(); let hashes = storage_logs.iter().enumerate().map(|(i, log)| { let output = tree - .process_l1_batch(&[TreeInstruction::write(log.key, i as u64 + 1, log.value)]) + .process_l1_batch(&[TreeInstruction::write( + log.key.hashed_key_u256(), + i as u64 + 1, + log.value, + )]) .unwrap(); tree.save().unwrap(); output.root_hash @@ -101,7 +105,7 @@ async fn setup_storage(storage: &mut Connection<'_, Core>, storage_logs: &[Stora .unwrap(); storage .storage_logs_dedup_dal() - .insert_initial_writes(l1_batch_header.number, &[storage_log.key]) + .insert_initial_writes(l1_batch_header.number, &[storage_log.key.hashed_key()]) .await .unwrap(); } @@ -237,7 +241,7 @@ async fn create_mock_snapshot( let key = object_store .put( key, - &SnapshotStorageLogsChunk { + &SnapshotStorageLogsChunk:: { storage_logs: vec![], }, ) diff --git a/core/node/commitment_generator/src/lib.rs b/core/node/commitment_generator/src/lib.rs index 135aca361a0b..6dc1ef2d29fa 100644 --- a/core/node/commitment_generator/src/lib.rs +++ b/core/node/commitment_generator/src/lib.rs @@ -180,7 +180,7 @@ impl CommitmentGenerator { }; let touched_slots = connection .storage_logs_dal() - .get_touched_slots_for_l1_batch(l1_batch_number) + .get_touched_slots_for_executed_l1_batch(l1_batch_number) .await?; let touched_hashed_keys: Vec<_> = touched_slots.keys().map(|key| key.hashed_key()).collect(); diff --git a/core/node/commitment_generator/src/tests.rs b/core/node/commitment_generator/src/tests.rs index 29f17fa1646f..d857013a7699 100644 --- a/core/node/commitment_generator/src/tests.rs +++ b/core/node/commitment_generator/src/tests.rs @@ -31,7 +31,7 @@ async fn seal_l1_batch(storage: &mut Connection<'_, Core>, number: L1BatchNumber .unwrap(); storage .storage_logs_dedup_dal() - .insert_initial_writes(number, &[storage_key]) + .insert_initial_writes(number, &[storage_key.hashed_key()]) .await .unwrap(); diff --git a/core/node/consensus/src/storage/testonly.rs b/core/node/consensus/src/storage/testonly.rs index f5f30021b7c4..072ec9305266 100644 --- a/core/node/consensus/src/storage/testonly.rs +++ b/core/node/consensus/src/storage/testonly.rs @@ -34,10 +34,10 @@ impl ConnectionPool { ) -> ConnectionPool { match from_snapshot { true => { - ConnectionPool::from_snapshot(Snapshot::make( + ConnectionPool::from_snapshot(Snapshot::new( L1BatchNumber(23), L2BlockNumber(87), - &[], + vec![], mock_genesis_params(protocol_version), )) .await diff --git a/core/node/consensus/src/tests.rs b/core/node/consensus/src/tests.rs index b16c66e478bb..5db6e250da6c 100644 --- a/core/node/consensus/src/tests.rs +++ b/core/node/consensus/src/tests.rs @@ -1,4 +1,3 @@ -#![allow(unused)] use anyhow::Context as _; use test_casing::{test_casing, Product}; use tracing::Instrument as _; @@ -10,9 +9,7 @@ use zksync_consensus_roles::{ validator, validator::testonly::{Setup, SetupSpec}, }; -use zksync_dal::CoreDal; -use zksync_node_test_utils::Snapshot; -use zksync_types::{L1BatchNumber, L2BlockNumber, ProtocolVersionId}; +use zksync_types::{L1BatchNumber, ProtocolVersionId}; use super::*; diff --git a/core/node/genesis/src/lib.rs b/core/node/genesis/src/lib.rs index e1f15109bc0d..de0fc14b177b 100644 --- a/core/node/genesis/src/lib.rs +++ b/core/node/genesis/src/lib.rs @@ -220,12 +220,13 @@ pub async fn insert_genesis_batch( .into_iter() .partition(|log_query| log_query.rw_flag); - let storage_logs: Vec> = deduplicated_writes + let storage_logs: Vec = deduplicated_writes .iter() .enumerate() .map(|(index, log)| { TreeInstruction::write( - StorageKey::new(AccountTreeId::new(log.address), u256_to_h256(log.key)), + StorageKey::new(AccountTreeId::new(log.address), u256_to_h256(log.key)) + .hashed_key_u256(), (index + 1) as u64, u256_to_h256(log.written_value), ) diff --git a/core/node/genesis/src/utils.rs b/core/node/genesis/src/utils.rs index af257b13bb70..a6c9513dbde8 100644 --- a/core/node/genesis/src/utils.rs +++ b/core/node/genesis/src/utils.rs @@ -199,7 +199,9 @@ pub(super) async fn insert_system_contracts( let written_storage_keys: Vec<_> = deduplicated_writes .iter() - .map(|log| StorageKey::new(AccountTreeId::new(log.address), u256_to_h256(log.key))) + .map(|log| { + StorageKey::new(AccountTreeId::new(log.address), u256_to_h256(log.key)).hashed_key() + }) .collect(); transaction .storage_logs_dedup_dal() diff --git a/core/node/metadata_calculator/src/helpers.rs b/core/node/metadata_calculator/src/helpers.rs index c71c0ecf9255..5e3c1f3d9d73 100644 --- a/core/node/metadata_calculator/src/helpers.rs +++ b/core/node/metadata_calculator/src/helpers.rs @@ -612,7 +612,7 @@ impl Delayer { #[cfg_attr(test, derive(PartialEq))] pub(crate) struct L1BatchWithLogs { pub header: L1BatchHeader, - pub storage_logs: Vec>, + pub storage_logs: Vec, mode: MerkleTreeMode, } @@ -688,6 +688,7 @@ impl L1BatchWithLogs { writes .chain(reads) .sorted_by_key(|tree_instruction| tree_instruction.key()) + .map(TreeInstruction::with_hashed_key) .collect() } else { // Otherwise, load writes' data from other tables. @@ -731,11 +732,11 @@ impl L1BatchWithLogs { connection: &mut Connection<'_, Core>, l1_batch_number: L1BatchNumber, protective_reads: HashSet, - ) -> anyhow::Result>> { + ) -> anyhow::Result> { let touched_slots_latency = METRICS.start_load_stage(LoadChangesStage::LoadTouchedSlots); let mut touched_slots = connection .storage_logs_dal() - .get_touched_slots_for_l1_batch(l1_batch_number) + .get_touched_slots_for_executed_l1_batch(l1_batch_number) .await .context("cannot fetch touched slots")?; touched_slots_latency.observe_with_count(touched_slots.len()); @@ -758,7 +759,7 @@ impl L1BatchWithLogs { // their further processing. This is not a required step; the logic below works fine without it. // Indeed, extra no-op updates that could be added to `storage_logs` as a consequence of no filtering, // are removed on the Merkle tree level (see the tree domain wrapper). - let log = TreeInstruction::Read(storage_key); + let log = TreeInstruction::Read(storage_key.hashed_key_u256()); storage_logs.insert(storage_key, log); } tracing::debug!( @@ -774,7 +775,7 @@ impl L1BatchWithLogs { if initial_write_batch_for_key <= l1_batch_number { storage_logs.insert( storage_key, - TreeInstruction::write(storage_key, leaf_index, value), + TreeInstruction::write(storage_key.hashed_key_u256(), leaf_index, value), ); } } @@ -786,11 +787,13 @@ impl L1BatchWithLogs { #[cfg(test)] mod tests { + use std::collections::HashMap; + use tempfile::TempDir; use zksync_dal::{ConnectionPool, Core}; use zksync_node_genesis::{insert_genesis_batch, GenesisParams}; use zksync_prover_interface::inputs::PrepareBasicCircuitsJob; - use zksync_types::{writes::TreeWrite, StorageKey, StorageLog}; + use zksync_types::{writes::TreeWrite, StorageKey, StorageLog, U256}; use super::*; use crate::tests::{extend_db_state, gen_storage_logs, mock_config, reset_db_state}; @@ -813,7 +816,7 @@ mod tests { .unwrap(); let touched_slots = storage .storage_logs_dal() - .get_touched_slots_for_l1_batch(l1_batch_number) + .get_touched_slots_for_executed_l1_batch(l1_batch_number) .await .unwrap(); @@ -845,7 +848,10 @@ mod tests { ); } - storage_logs.insert(storage_key, TreeInstruction::Read(storage_key)); + storage_logs.insert( + storage_key, + TreeInstruction::Read(storage_key.hashed_key_u256()), + ); } for (storage_key, value) in touched_slots { @@ -854,7 +860,7 @@ mod tests { let (_, leaf_index) = l1_batches_for_initial_writes[&storage_key.hashed_key()]; storage_logs.insert( storage_key, - TreeInstruction::write(storage_key, leaf_index, value), + TreeInstruction::write(storage_key.hashed_key_u256(), leaf_index, value), ); } } @@ -881,6 +887,19 @@ mod tests { let mut storage = pool.connection().await.unwrap(); let mut tree_writes = Vec::new(); + // Create a lookup table for storage key preimages + let all_storage_logs = storage + .storage_logs_dal() + .dump_all_storage_logs_for_tests() + .await; + let logs_by_hashed_key: HashMap<_, _> = all_storage_logs + .into_iter() + .map(|log| { + let tree_key = U256::from_little_endian(log.hashed_key.as_bytes()); + (tree_key, log) + }) + .collect(); + // Check equivalence in case `tree_writes` are not present in DB. for l1_batch_number in 0..=5 { let l1_batch_number = L1BatchNumber(l1_batch_number); @@ -899,8 +918,8 @@ mod tests { .into_iter() .filter_map(|instruction| match instruction { TreeInstruction::Write(tree_entry) => Some(TreeWrite { - address: *tree_entry.key.address(), - key: *tree_entry.key.key(), + address: logs_by_hashed_key[&tree_entry.key].address.unwrap(), + key: logs_by_hashed_key[&tree_entry.key].key.unwrap(), value: tree_entry.value, leaf_index: tree_entry.leaf_index, }), diff --git a/core/node/metadata_calculator/src/recovery/tests.rs b/core/node/metadata_calculator/src/recovery/tests.rs index dc333a30fa2e..3861e8a5a84e 100644 --- a/core/node/metadata_calculator/src/recovery/tests.rs +++ b/core/node/metadata_calculator/src/recovery/tests.rs @@ -102,7 +102,7 @@ async fn prepare_recovery_snapshot_with_genesis( // Add all logs from the genesis L1 batch to `logs` so that they cover all state keys. let genesis_logs = storage .storage_logs_dal() - .get_touched_slots_for_l1_batch(L1BatchNumber(0)) + .get_touched_slots_for_executed_l1_batch(L1BatchNumber(0)) .await .unwrap(); let genesis_logs = genesis_logs @@ -362,7 +362,9 @@ async fn entire_recovery_workflow(case: RecoveryWorkflowCase) { .iter() .chain(&new_logs) .enumerate() - .map(|(i, log)| TreeInstruction::write(log.key, i as u64 + 1, log.value)) + .map(|(i, log)| { + TreeInstruction::write(log.key.hashed_key_u256(), i as u64 + 1, log.value) + }) .collect(); let expected_new_root_hash = ZkSyncTree::process_genesis_batch(&all_tree_instructions).root_hash; diff --git a/core/node/metadata_calculator/src/tests.rs b/core/node/metadata_calculator/src/tests.rs index d462511829dc..c5a00ecd7563 100644 --- a/core/node/metadata_calculator/src/tests.rs +++ b/core/node/metadata_calculator/src/tests.rs @@ -825,7 +825,7 @@ async fn insert_initial_writes_for_batch( ) { let written_non_zero_slots: Vec<_> = connection .storage_logs_dal() - .get_touched_slots_for_l1_batch(l1_batch_number) + .get_touched_slots_for_executed_l1_batch(l1_batch_number) .await .unwrap() .into_iter() @@ -845,6 +845,7 @@ async fn insert_initial_writes_for_batch( .into_iter() .sorted() .filter(|key| !pre_written_slots.contains(&key.hashed_key())) + .map(|key| key.hashed_key()) .collect(); connection .storage_logs_dedup_dal() diff --git a/core/node/node_sync/src/tree_data_fetcher/tests.rs b/core/node/node_sync/src/tree_data_fetcher/tests.rs index 5d94ddf658d6..5cb8b9241b24 100644 --- a/core/node/node_sync/src/tree_data_fetcher/tests.rs +++ b/core/node/node_sync/src/tree_data_fetcher/tests.rs @@ -85,7 +85,8 @@ pub(super) async fn seal_l1_batch_with_timestamp( let initial_writes = [StorageKey::new( AccountTreeId::new(Address::repeat_byte(1)), H256::from_low_u64_be(number.0.into()), - )]; + ) + .hashed_key()]; transaction .storage_logs_dedup_dal() .insert_initial_writes(number, &initial_writes) diff --git a/core/node/state_keeper/src/batch_executor/tests/mod.rs b/core/node/state_keeper/src/batch_executor/tests/mod.rs index c2196a7b6b28..4b36965895fd 100644 --- a/core/node/state_keeper/src/batch_executor/tests/mod.rs +++ b/core/node/state_keeper/src/batch_executor/tests/mod.rs @@ -69,12 +69,12 @@ impl SnapshotRecoveryMutation { fn mutate_snapshot(self, storage_snapshot: &mut StorageSnapshot, alice: &Account) { match self { Self::RemoveNonce => { - let nonce_key = get_nonce_key(&alice.address()); + let nonce_key = get_nonce_key(&alice.address()).hashed_key(); let nonce_value = storage_snapshot.storage_logs.remove(&nonce_key); assert!(nonce_value.is_some()); } Self::RemoveBalance => { - let balance_key = storage_key_for_eth_balance(&alice.address()); + let balance_key = storage_key_for_eth_balance(&alice.address()).hashed_key(); let balance_value = storage_snapshot.storage_logs.remove(&balance_key); assert!(balance_value.is_some()); } @@ -82,8 +82,8 @@ impl SnapshotRecoveryMutation { } } -const EXECUTE_L2_TX_AFTER_SNAPSHOT_RECOVERY_CASES: test_casing::Product<( - [std::option::Option; 3], +const EXECUTE_L2_TX_AFTER_SNAPSHOT_RECOVERY_CASES: Product<( + [Option; 3], [StorageType; 3], )> = Product((SnapshotRecoveryMutation::ALL, StorageType::ALL)); diff --git a/core/node/state_keeper/src/batch_executor/tests/tester.rs b/core/node/state_keeper/src/batch_executor/tests/tester.rs index 91ff05357931..579f3bee4819 100644 --- a/core/node/state_keeper/src/batch_executor/tests/tester.rs +++ b/core/node/state_keeper/src/batch_executor/tests/tester.rs @@ -12,16 +12,20 @@ use zksync_multivm::{ interface::{L1BatchEnv, L2BlockEnv, SystemEnv}, vm_latest::constants::INITIAL_STORAGE_WRITE_PUBDATA_BYTES, }; -use zksync_node_genesis::create_genesis_l1_batch; -use zksync_node_test_utils::prepare_recovery_snapshot; +use zksync_node_genesis::{create_genesis_l1_batch, GenesisParams}; +use zksync_node_test_utils::{recover, Snapshot}; use zksync_state::{ReadStorageFactory, RocksdbStorageOptions}; use zksync_test_account::{Account, DeployContractsTx, TxType}; use zksync_types::{ - block::L2BlockHasher, ethabi::Token, protocol_version::ProtocolSemanticVersion, - snapshots::SnapshotRecoveryStatus, storage_writes_deduplicator::StorageWritesDeduplicator, - system_contracts::get_system_smart_contracts, utils::storage_key_for_standard_token_balance, + block::L2BlockHasher, + ethabi::Token, + protocol_version::ProtocolSemanticVersion, + snapshots::{SnapshotRecoveryStatus, SnapshotStorageLog}, + storage_writes_deduplicator::StorageWritesDeduplicator, + system_contracts::get_system_smart_contracts, + utils::storage_key_for_standard_token_balance, AccountTreeId, Address, Execute, L1BatchNumber, L2BlockNumber, PriorityOpId, ProtocolVersionId, - StorageKey, StorageLog, Transaction, H256, L2_BASE_TOKEN_ADDRESS, U256, + StorageLog, Transaction, H256, L2_BASE_TOKEN_ADDRESS, U256, }; use zksync_utils::u256_to_h256; @@ -284,7 +288,7 @@ impl Tester { { storage .storage_logs_dedup_dal() - .insert_initial_writes(L1BatchNumber(0), &[storage_log.key]) + .insert_initial_writes(L1BatchNumber(0), &[storage_log.key.hashed_key()]) .await .unwrap(); } @@ -433,7 +437,7 @@ pub(super) struct StorageSnapshot { pub l2_block_number: L2BlockNumber, pub l2_block_hash: H256, pub l2_block_timestamp: u64, - pub storage_logs: HashMap, + pub storage_logs: HashMap, pub factory_deps: HashMap>, } @@ -512,7 +516,7 @@ impl StorageSnapshot { all_logs.extend( modified_entries .into_iter() - .map(|(key, slot)| (key, slot.value)), + .map(|(key, slot)| (key.hashed_key(), slot.value)), ); // Compute the hash of the last (fictive) L2 block in the batch. @@ -539,17 +543,23 @@ impl StorageSnapshot { let snapshot_logs: Vec<_> = self .storage_logs .into_iter() - .map(|(key, value)| StorageLog::new_write_log(key, value)) + .enumerate() + .map(|(i, (key, value))| SnapshotStorageLog { + key, + value, + l1_batch_number_of_initial_write: L1BatchNumber(1), + enumeration_index: i as u64 + 1, + }) .collect(); let mut storage = connection_pool.connection().await.unwrap(); - let mut snapshot = prepare_recovery_snapshot( - &mut storage, + + let snapshot = Snapshot::new( L1BatchNumber(1), self.l2_block_number, - &snapshot_logs, - ) - .await; - + snapshot_logs, + GenesisParams::mock(), + ); + let mut snapshot = recover(&mut storage, snapshot).await; snapshot.l2_block_hash = self.l2_block_hash; snapshot.l2_block_timestamp = self.l2_block_timestamp; diff --git a/core/node/state_keeper/src/io/seal_logic/mod.rs b/core/node/state_keeper/src/io/seal_logic/mod.rs index e998317f726a..92630015f2a2 100644 --- a/core/node/state_keeper/src/io/seal_logic/mod.rs +++ b/core/node/state_keeper/src/io/seal_logic/mod.rs @@ -22,8 +22,8 @@ use zksync_types::{ TransactionExecutionResult, }, utils::display_timestamp, - AccountTreeId, Address, ExecuteTransactionCommon, ProtocolVersionId, StorageKey, StorageLog, - Transaction, VmEvent, H256, + Address, ExecuteTransactionCommon, ProtocolVersionId, StorageKey, StorageLog, Transaction, + VmEvent, H256, }; use zksync_utils::u256_to_h256; @@ -185,47 +185,46 @@ impl UpdatesManager { } let progress = L1_BATCH_METRICS.start(L1BatchSealStage::FilterWrittenSlots); - let (initial_writes, all_writes_len): (Vec<_>, usize) = if let Some(state_diffs) = - &finished_batch.state_diffs - { - let all_writes_len = state_diffs.len(); - - ( - state_diffs + let (initial_writes, all_writes_len): (Vec<_>, usize) = + if let Some(state_diffs) = &finished_batch.state_diffs { + let all_writes_len = state_diffs.len(); + + ( + state_diffs + .iter() + .filter(|diff| diff.is_write_initial()) + .map(|diff| { + H256(StorageKey::raw_hashed_key( + &diff.address, + &u256_to_h256(diff.key), + )) + }) + .collect(), + all_writes_len, + ) + } else { + let deduplicated_writes_hashed_keys_iter = finished_batch + .final_execution_state + .deduplicated_storage_logs .iter() - .filter(|diff| diff.is_write_initial()) - .map(|diff| { - StorageKey::new(AccountTreeId::new(diff.address), u256_to_h256(diff.key)) - }) - .collect(), - all_writes_len, - ) - } else { - let deduplicated_writes = finished_batch - .final_execution_state - .deduplicated_storage_logs - .iter() - .filter(|log_query| log_query.is_write()); - - let deduplicated_writes_hashed_keys: Vec<_> = deduplicated_writes - .clone() - .map(|log| log.key.hashed_key()) - .collect(); - let all_writes_len = deduplicated_writes_hashed_keys.len(); - let non_initial_writes = transaction - .storage_logs_dedup_dal() - .filter_written_slots(&deduplicated_writes_hashed_keys) - .await?; - - ( - deduplicated_writes - .filter_map(|log| { - (!non_initial_writes.contains(&log.key.hashed_key())).then_some(log.key) - }) - .collect(), - all_writes_len, - ) - }; + .filter(|log| log.is_write()) + .map(|log| log.key.hashed_key()); + + let deduplicated_writes_hashed_keys: Vec<_> = + deduplicated_writes_hashed_keys_iter.clone().collect(); + let all_writes_len = deduplicated_writes_hashed_keys.len(); + let non_initial_writes = transaction + .storage_logs_dedup_dal() + .filter_written_slots(&deduplicated_writes_hashed_keys) + .await?; + + ( + deduplicated_writes_hashed_keys_iter + .filter(|hashed_key| !non_initial_writes.contains(hashed_key)) + .collect(), + all_writes_len, + ) + }; progress.observe(all_writes_len); let progress = L1_BATCH_METRICS.start(L1BatchSealStage::InsertInitialWrites); diff --git a/core/node/state_keeper/src/io/tests/mod.rs b/core/node/state_keeper/src/io/tests/mod.rs index 943ecfc2ad79..7c70607c763b 100644 --- a/core/node/state_keeper/src/io/tests/mod.rs +++ b/core/node/state_keeper/src/io/tests/mod.rs @@ -311,7 +311,7 @@ async fn processing_storage_logs_when_sealing_l2_block() { // Keys that are only read must not be written to `storage_logs`. let account = AccountTreeId::default(); let read_key = StorageKey::new(account, H256::from_low_u64_be(1)); - assert!(!touched_slots.contains_key(&read_key)); + assert!(!touched_slots.contains_key(&read_key.hashed_key())); // The storage logs must be inserted and read in the correct order, so that // `touched_slots` contain the most recent values in the L1 batch. @@ -320,7 +320,7 @@ async fn processing_storage_logs_when_sealing_l2_block() { for (key, value) in written_kvs { let key = StorageKey::new(account, H256::from_low_u64_be(key)); let expected_value = H256::from_low_u64_be(value); - assert_eq!(touched_slots[&key], expected_value); + assert_eq!(touched_slots[&key.hashed_key()], expected_value); } } diff --git a/core/node/state_keeper/src/testonly/mod.rs b/core/node/state_keeper/src/testonly/mod.rs index 965c3c0f05ca..940e4c19c4be 100644 --- a/core/node/state_keeper/src/testonly/mod.rs +++ b/core/node/state_keeper/src/testonly/mod.rs @@ -142,7 +142,7 @@ pub async fn fund(pool: &ConnectionPool, addresses: &[Address]) { { storage .storage_logs_dedup_dal() - .insert_initial_writes(L1BatchNumber(0), &[storage_log.key]) + .insert_initial_writes(L1BatchNumber(0), &[storage_log.key.hashed_key()]) .await .unwrap(); } diff --git a/core/node/test_utils/src/lib.rs b/core/node/test_utils/src/lib.rs index 6b3082abb35e..ee3503322aea 100644 --- a/core/node/test_utils/src/lib.rs +++ b/core/node/test_utils/src/lib.rs @@ -18,7 +18,7 @@ use zksync_types::{ fee_model::BatchFeeInput, l2::L2Tx, protocol_version::ProtocolSemanticVersion, - snapshots::SnapshotRecoveryStatus, + snapshots::{SnapshotRecoveryStatus, SnapshotStorageLog}, transaction_request::PaymasterParams, tx::{tx_execution_info::TxExecutionStatus, ExecutionMetrics, TransactionExecutionResult}, Address, K256PrivateKey, L1BatchNumber, L2BlockNumber, L2ChainId, Nonce, ProtocolVersion, @@ -154,16 +154,16 @@ pub fn execute_l2_transaction(transaction: L2Tx) -> TransactionExecutionResult { pub struct Snapshot { pub l1_batch: L1BatchHeader, pub l2_block: L2BlockHeader, - pub storage_logs: Vec, + pub storage_logs: Vec, pub factory_deps: HashMap>, } impl Snapshot { // Constructs a dummy Snapshot based on the provided values. - pub fn make( + pub fn new( l1_batch: L1BatchNumber, l2_block: L2BlockNumber, - storage_logs: &[StorageLog], + storage_logs: Vec, genesis_params: GenesisParams, ) -> Self { let contracts = genesis_params.base_system_contracts(); @@ -197,7 +197,7 @@ impl Snapshot { .into_iter() .map(|c| (c.hash, zksync_utils::be_words_to_bytes(&c.code))) .collect(), - storage_logs: storage_logs.to_vec(), + storage_logs, } } } @@ -209,11 +209,18 @@ pub async fn prepare_recovery_snapshot( l2_block: L2BlockNumber, storage_logs: &[StorageLog], ) -> SnapshotRecoveryStatus { - recover( - storage, - Snapshot::make(l1_batch, l2_block, storage_logs, GenesisParams::mock()), - ) - .await + let storage_logs = storage_logs + .iter() + .enumerate() + .map(|(i, log)| SnapshotStorageLog { + key: log.key.hashed_key(), + value: log.value, + l1_batch_number_of_initial_write: l1_batch, + enumeration_index: i as u64 + 1, + }) + .collect(); + let snapshot = Snapshot::new(l1_batch, l2_block, storage_logs, GenesisParams::mock()); + recover(storage, snapshot).await } /// Takes a storage snapshot at the last sealed L1 batch. @@ -248,10 +255,7 @@ pub async fn snapshot(storage: &mut Connection<'_, Core>) -> Snapshot { .snapshots_creator_dal() .get_storage_logs_chunk(l2_block, l1_batch.number, all_hashes) .await - .unwrap() - .into_iter() - .map(|l| StorageLog::new_write_log(l.key, l.value)) - .collect(), + .unwrap(), factory_deps: storage .snapshots_creator_dal() .get_all_factory_deps(l2_block) @@ -274,8 +278,10 @@ pub async fn recover( let tree_instructions: Vec<_> = snapshot .storage_logs .iter() - .enumerate() - .map(|(i, log)| TreeInstruction::write(log.key, i as u64 + 1, log.value)) + .map(|log| { + let tree_key = U256::from_little_endian(log.key.as_bytes()); + TreeInstruction::write(tree_key, log.enumeration_index, log.value) + }) .collect(); let l1_batch_root_hash = ZkSyncTree::process_genesis_batch(&tree_instructions).root_hash; @@ -317,7 +323,7 @@ pub async fn recover( .unwrap(); storage .storage_logs_dal() - .insert_storage_logs(snapshot.l2_block.number, &snapshot.storage_logs) + .insert_storage_logs_from_snapshot(snapshot.l2_block.number, &snapshot.storage_logs) .await .unwrap(); diff --git a/core/node/vm_runner/src/tests/mod.rs b/core/node/vm_runner/src/tests/mod.rs index 52c4db4bb486..c592122b1e09 100644 --- a/core/node/vm_runner/src/tests/mod.rs +++ b/core/node/vm_runner/src/tests/mod.rs @@ -233,7 +233,7 @@ async fn store_l1_batches( for _ in 0..10 { let key = StorageKey::new(AccountTreeId::new(H160::random()), H256::random()); let value = StorageValue::random(); - written_keys.push(key); + written_keys.push(key.hashed_key()); logs.push(StorageLog { kind: StorageLogKind::RepeatedWrite, key, @@ -354,7 +354,7 @@ async fn fund(pool: &ConnectionPool, accounts: &[Account]) { .is_empty() { conn.storage_logs_dedup_dal() - .insert_initial_writes(L1BatchNumber(0), &[storage_log.key]) + .insert_initial_writes(L1BatchNumber(0), &[storage_log.key.hashed_key()]) .await .unwrap(); } diff --git a/core/node/vm_runner/src/tests/storage.rs b/core/node/vm_runner/src/tests/storage.rs index afeaac8a8364..52de43801ff0 100644 --- a/core/node/vm_runner/src/tests/storage.rs +++ b/core/node/vm_runner/src/tests/storage.rs @@ -338,8 +338,10 @@ async fn access_vm_runner_storage() -> anyhow::Result<()> { })?; // Check that both storages have identical key-value pairs written in them for storage_log in &storage_logs { - let storage_key = - StorageKey::new(AccountTreeId::new(storage_log.address), storage_log.key); + let storage_key = StorageKey::new( + AccountTreeId::new(storage_log.address.unwrap()), + storage_log.key.unwrap(), + ); assert_eq!( pg_storage.read_value(&storage_key), vm_storage.read_value(&storage_key) diff --git a/core/tests/recovery-test/tests/snapshot-recovery.test.ts b/core/tests/recovery-test/tests/snapshot-recovery.test.ts index 3a5d3b7ef57c..30ef55fa8621 100644 --- a/core/tests/recovery-test/tests/snapshot-recovery.test.ts +++ b/core/tests/recovery-test/tests/snapshot-recovery.test.ts @@ -22,6 +22,7 @@ interface AllSnapshotsResponse { } interface GetSnapshotResponse { + readonly version: number; readonly miniblockNumber: number; readonly l1BatchNumber: number; readonly storageLogsChunks: Array; @@ -138,6 +139,7 @@ describe('snapshot recovery', () => { const l1BatchNumber = Math.max(...newBatchNumbers); snapshotMetadata = await getSnapshot(l1BatchNumber); console.log('Obtained latest snapshot', snapshotMetadata); + expect(snapshotMetadata.version).to.be.oneOf([0, 1]); const l2BlockNumber = snapshotMetadata.miniblockNumber; const protoPath = path.join(homeDir, 'core/lib/types/src/proto/mod.proto'); @@ -160,17 +162,20 @@ describe('snapshot recovery', () => { } sampledCount++; - const snapshotAccountAddress = '0x' + storageLog.accountAddress.toString('hex'); - const snapshotKey = '0x' + storageLog.storageKey.toString('hex'); - const snapshotValue = '0x' + storageLog.storageValue.toString('hex'); const snapshotL1BatchNumber = storageLog.l1BatchNumberOfInitialWrite; - const valueOnBlockchain = await mainNode.getStorageAt( - snapshotAccountAddress, - snapshotKey, - l2BlockNumber - ); - expect(snapshotValue).to.equal(valueOnBlockchain); expect(snapshotL1BatchNumber).to.be.lessThanOrEqual(l1BatchNumber); + + if (snapshotMetadata.version === 0) { + const snapshotAccountAddress = '0x' + storageLog.accountAddress.toString('hex'); + const snapshotKey = '0x' + storageLog.storageKey.toString('hex'); + const snapshotValue = '0x' + storageLog.storageValue.toString('hex'); + const valueOnBlockchain = await mainNode.getStorageAt( + snapshotAccountAddress, + snapshotKey, + l2BlockNumber + ); + expect(snapshotValue).to.equal(valueOnBlockchain); + } } console.log(`Checked random ${sampledCount} logs in the chunk`); } diff --git a/etc/env/file_based/general.yaml b/etc/env/file_based/general.yaml index 8e435a77b736..f2733d5d1ee4 100644 --- a/etc/env/file_based/general.yaml +++ b/etc/env/file_based/general.yaml @@ -342,6 +342,7 @@ snapshot_recovery: chunk_size: 200000 experimental: tree_recovery_parallel_persistence_buffer: 1 + drop_storage_key_preimages: true pruning: enabled: true chunk_size: 10 @@ -351,7 +352,6 @@ pruning: commitment_generator: max_parallelism: 10 - core_object_store: file_backed: file_backed_base_path: artifacts