Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(snapshots_creator): Remove snapshots during reverts #1757

Merged
merged 18 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 7 additions & 22 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ axum = "0.6.19"
backon = "0.4.4"
bigdecimal = "0.3.0"
bincode = "1"
bitflags = "1.3.2"
blake2 = "0.10"
chrono = "0.4"
clap = "4.2.2"
Expand Down Expand Up @@ -140,7 +139,6 @@ sqlx = "0.7.3"
static_assertions = "1.1"
structopt = "0.3.20"
strum = "0.24"
tempdir = "0.3.7"
tempfile = "3.0.2"
test-casing = "0.1.2"
test-log = "0.2.15"
Expand Down
1 change: 1 addition & 0 deletions core/bin/block_reverter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ publish = false
zksync_config.workspace = true
zksync_env_config.workspace = true
zksync_dal.workspace = true
zksync_object_store.workspace = true
zksync_types.workspace = true
zksync_block_reverter.workspace = true
vlog.workspace = true
Expand Down
100 changes: 54 additions & 46 deletions core/bin/block_reverter/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
use std::env;

use anyhow::Context as _;
use clap::{Parser, Subcommand};
use tokio::io::{self, AsyncReadExt};
use zksync_block_reverter::{
BlockReverter, BlockReverterEthConfig, BlockReverterFlags, L1ExecutedBatchesRevert, NodeRole,
};
use zksync_block_reverter::{BlockReverter, BlockReverterEthConfig, NodeRole};
use zksync_config::{
configs::ObservabilityConfig, ContractsConfig, DBConfig, EthConfig, PostgresConfig,
configs::{chain::NetworkConfig, ObservabilityConfig},
ContractsConfig, DBConfig, EthConfig, PostgresConfig,
};
use zksync_dal::{ConnectionPool, Core};
use zksync_env_config::FromEnv;
use zksync_env_config::{object_store::SnapshotsObjectStoreConfig, FromEnv};
use zksync_object_store::ObjectStoreFactory;
use zksync_types::{Address, L1BatchNumber, U256};

#[derive(Debug, Parser)]
Expand All @@ -33,23 +35,23 @@ enum Command {
/// Sends revert transaction to L1.
#[command(name = "send-eth-transaction")]
SendEthTransaction {
/// L1 batch number used to rollback to.
/// L1 batch number used to revert to.
slowli marked this conversation as resolved.
Show resolved Hide resolved
#[arg(long)]
l1_batch_number: u32,
/// Priority fee used for rollback Ethereum transaction.
/// Priority fee used for the reverting Ethereum transaction.
// We operate only by priority fee because we want to use base fee from Ethereum
// and send transaction as soon as possible without any resend logic
#[arg(long)]
priority_fee_per_gas: Option<u64>,
/// Nonce used for rollback Ethereum transaction.
/// Nonce used for reverting Ethereum transaction.
#[arg(long)]
nonce: u64,
},

/// Reverts internal database state to previous block.
/// Rolls back internal database state to a previous L1 batch.
#[command(name = "rollback-db")]
RollbackDB {
/// L1 batch number used to rollback to.
/// L1 batch number used to roll back to.
#[arg(long)]
l1_batch_number: u32,
/// Flag that specifies if Postgres DB should be rolled back.
Expand All @@ -61,7 +63,7 @@ enum Command {
/// Flag that specifies if RocksDB with state keeper cache should be rolled back.
#[arg(long)]
rollback_sk_cache: bool,
/// Flag that allows to revert already executed blocks, it's ultra dangerous and required only for fixing external nodes
/// Flag that allows to roll back already executed blocks. It's ultra dangerous and required only for fixing external nodes.
#[arg(long)]
allow_executed_block_reversion: bool,
},
Expand Down Expand Up @@ -99,16 +101,16 @@ async fn main() -> anyhow::Result<()> {
.default_priority_fee_per_gas,
);
let contracts = ContractsConfig::from_env().context("ContractsConfig::from_env()")?;
// FIXME: is it correct to parse the entire `NetworkConfig`?
slowli marked this conversation as resolved.
Show resolved Hide resolved
let network = NetworkConfig::from_env().context("NetworkConfig::from_env()")?;
let postgres_config = PostgresConfig::from_env().context("PostgresConfig::from_env()")?;
let operator_address = if let Command::Display {
operator_address, ..
} = &command
{
Some(operator_address)
} else {
None
};
let config = BlockReverterEthConfig::new(eth_sender, contracts, operator_address.copied());
let era_chain_id = env::var("CONTRACTS_ERA_CHAIN_ID")
.context("`CONTRACTS_ERA_CHAIN_ID` env variable is not set")?
.parse()
.map_err(|err| {
anyhow::anyhow!("failed parsing `CONTRACTS_ERA_CHAIN_ID` env variable: {err}")
})?;
let config = BlockReverterEthConfig::new(eth_sender, &contracts, &network, era_chain_id)?;
slowli marked this conversation as resolved.
Show resolved Hide resolved

let connection_pool = ConnectionPool::<Core>::builder(
postgres_config.master_url()?,
Expand All @@ -117,22 +119,20 @@ async fn main() -> anyhow::Result<()> {
.build()
.await
.context("failed to build a connection pool")?;
let mut block_reverter = BlockReverter::new(
NodeRole::Main,
db_config.state_keeper_db_path,
db_config.merkle_tree.path,
Some(config),
connection_pool,
L1ExecutedBatchesRevert::Disallowed,
);
let mut block_reverter = BlockReverter::new(NodeRole::Main, connection_pool);

match command {
Command::Display { json, .. } => {
let suggested_values = block_reverter.suggested_values().await;
Command::Display {
json,
operator_address,
} => {
let suggested_values = block_reverter
.suggested_values(&config, operator_address)
.await?;
if json {
println!("{}", serde_json::to_string(&suggested_values).unwrap());
println!("{}", serde_json::to_string(&suggested_values)?);
} else {
println!("Suggested values for rollback: {:#?}", suggested_values);
println!("Suggested values for reversion: {:#?}", suggested_values);
}
}
Command::SendEthTransaction {
Expand All @@ -144,11 +144,12 @@ async fn main() -> anyhow::Result<()> {
priority_fee_per_gas.map_or(default_priority_fee_per_gas, U256::from);
block_reverter
.send_ethereum_revert_transaction(
&config,
L1BatchNumber(l1_batch_number),
priority_fee_per_gas,
nonce,
)
.await
.await?;
}
Command::RollbackDB {
l1_batch_number,
Expand All @@ -158,9 +159,9 @@ async fn main() -> anyhow::Result<()> {
allow_executed_block_reversion,
} => {
if !rollback_tree && rollback_postgres {
println!("You want to rollback Postgres DB without rolling back tree.");
println!("You want to roll back Postgres DB without rolling back tree.");
println!(
"If tree is not yet rolled back to this block then the only way \
"If the tree is not yet rolled back to this L1 batch, then the only way \
to make it synced with Postgres will be to completely rebuild it."
);
println!("Are you sure? Print y/n");
Expand All @@ -173,7 +174,7 @@ async fn main() -> anyhow::Result<()> {
}

if allow_executed_block_reversion {
println!("You want to revert already executed blocks. It's impossible to restore them for the main node");
println!("You want to roll back already executed blocks. It's impossible to restore them for the main node");
println!("Make sure you are doing it ONLY for external node");
println!("Are you sure? Print y/n");

Expand All @@ -182,27 +183,34 @@ async fn main() -> anyhow::Result<()> {
if input[0] != b'y' && input[0] != b'Y' {
std::process::exit(0);
}
block_reverter.change_rollback_executed_l1_batches_allowance(
L1ExecutedBatchesRevert::Allowed,
);
block_reverter.allow_rolling_back_executed_batches();
}

let mut flags = BlockReverterFlags::empty();
if rollback_postgres {
flags |= BlockReverterFlags::POSTGRES;
block_reverter.enable_rolling_back_postgres();
let object_store_config = SnapshotsObjectStoreConfig::from_env()
.context("SnapshotsObjectStoreConfig::from_env()")?;
block_reverter.enable_rolling_back_snapshot_objects(
ObjectStoreFactory::new(object_store_config.0)
.create_store()
.await,
);
}
if rollback_tree {
flags |= BlockReverterFlags::TREE;
block_reverter.enable_rolling_back_merkle_tree(db_config.merkle_tree.path);
}
if rollback_sk_cache {
flags |= BlockReverterFlags::SK_CACHE;
block_reverter
.enable_rolling_back_state_keeper_cache(db_config.state_keeper_db_path);
}

block_reverter
.rollback_db(L1BatchNumber(l1_batch_number), flags)
.await
.roll_back(L1BatchNumber(l1_batch_number))
.await?;
}
Command::ClearFailedL1Transactions => {
block_reverter.clear_failed_l1_transactions().await?;
}
Command::ClearFailedL1Transactions => block_reverter.clear_failed_l1_transactions().await,
}
Ok(())
}
39 changes: 16 additions & 23 deletions core/bin/external_node/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use tokio::{
sync::{oneshot, watch, RwLock},
task::{self, JoinHandle},
};
use zksync_block_reverter::{BlockReverter, BlockReverterFlags, L1ExecutedBatchesRevert, NodeRole};
use zksync_block_reverter::{BlockReverter, NodeRole};
use zksync_commitment_generator::CommitmentGenerator;
use zksync_concurrency::{ctx, scope};
use zksync_config::configs::{
Expand Down Expand Up @@ -927,14 +927,13 @@ async fn run_node(
let sigint_receiver = env.setup_sigint_handler();

// Revert the storage if needed.
let reverter = BlockReverter::new(
NodeRole::External,
config.required.state_cache_path.clone(),
config.required.merkle_tree_path.clone(),
None,
connection_pool.clone(),
L1ExecutedBatchesRevert::Allowed,
);
let mut reverter = BlockReverter::new(NodeRole::External, connection_pool.clone());
// Reverting executed batches is more-or-less safe for external nodes.
let reverter = reverter
.allow_rolling_back_executed_batches()
.enable_rolling_back_postgres()
.enable_rolling_back_merkle_tree(config.required.merkle_tree_path.clone())
.enable_rolling_back_state_keeper_cache(config.required.state_cache_path.clone());

let mut reorg_detector = ReorgDetector::new(main_node_client.clone(), connection_pool.clone());
// We're checking for the reorg in the beginning because we expect that if reorg is detected during
Expand All @@ -944,31 +943,25 @@ async fn run_node(
match reorg_detector.check_consistency().await {
Ok(()) => {}
Err(reorg_detector::Error::ReorgDetected(last_correct_l1_batch)) => {
tracing::info!("Rolling back to l1 batch number {last_correct_l1_batch}");
reverter
.rollback_db(last_correct_l1_batch, BlockReverterFlags::all())
.await;
tracing::info!("Rollback successfully completed");
tracing::info!("Reverting to l1 batch number {last_correct_l1_batch}");
reverter.roll_back(last_correct_l1_batch).await?;
tracing::info!("Revert successfully completed");
}
Err(err) => return Err(err).context("reorg_detector.check_consistency()"),
}
if opt.revert_pending_l1_batch {
tracing::info!("Rolling pending L1 batch back..");
tracing::info!("Reverting pending L1 batch");
let mut connection = connection_pool.connection().await?;
let sealed_l1_batch_number = connection
.blocks_dal()
.get_sealed_l1_batch_number()
.await?
.context(
"Cannot roll back pending L1 batch since there are no L1 batches in Postgres",
)?;
.context("Cannot revert pending L1 batch since there are no L1 batches in Postgres")?;
drop(connection);

tracing::info!("Rolling back to l1 batch number {sealed_l1_batch_number}");
reverter
.rollback_db(sealed_l1_batch_number, BlockReverterFlags::all())
.await;
tracing::info!("Rollback successfully completed");
tracing::info!("Reverting to l1 batch number {sealed_l1_batch_number}");
reverter.roll_back(sealed_l1_batch_number).await?;
tracing::info!("Revert successfully completed");
}

app_health.insert_component(reorg_detector.health_check().clone())?;
Expand Down
Loading
Loading