Skip to content

Commit

Permalink
[Forge] Add working_dir param to support running node on checkpoint d…
Browse files Browse the repository at this point in the history
…ir, so that the existing data on disk is preserved.
  • Loading branch information
grao1991 committed Dec 6, 2022
1 parent c8a5858 commit 7b0074a
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 15 deletions.
52 changes: 48 additions & 4 deletions aptos-node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ mod log_build_information;
use anyhow::{anyhow, Context};
use aptos_api::bootstrap as bootstrap_api;
use aptos_build_info::build_information;
use aptos_config::config::StateSyncConfig;
use aptos_config::{
config::{
AptosDataClientConfig, BaseConfig, NetworkConfig, NodeConfig, PersistableConfig,
StorageServiceConfig,
RocksdbConfigs, StateSyncConfig, StorageServiceConfig, BUFFERED_STATE_TARGET_ITEMS,
DEFAULT_MAX_NUM_NODES_PER_LRU_CACHE_SHARD, NO_OP_STORAGE_PRUNER_CONFIG,
},
network_id::NetworkId,
utils::get_genesis_txn,
Expand All @@ -26,6 +26,7 @@ use aptos_types::{
account_config::CORE_CODE_ADDRESS, account_view::AccountView, chain_id::ChainId,
on_chain_config::ON_CHAIN_CONFIG_REGISTRY, waypoint::Waypoint,
};

use aptos_vm::AptosVM;
use aptosdb::AptosDB;
use backup_service::start_backup_service;
Expand Down Expand Up @@ -53,8 +54,9 @@ use state_sync_driver::{
use std::{
boxed::Box,
collections::{HashMap, HashSet},
fs,
io::Write,
path::PathBuf,
path::{Path, PathBuf},
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering},
Arc,
Expand Down Expand Up @@ -569,8 +571,43 @@ fn bootstrap_indexer(
Ok(None)
}

fn create_checkpoint_and_change_working_dir(
node_config: &mut NodeConfig,
working_dir: impl AsRef<Path>,
) {
let source_dir = node_config.storage.dir();
node_config.set_data_dir(working_dir.as_ref().to_path_buf());
let checkpoint_dir = node_config.storage.dir();

assert!(source_dir != checkpoint_dir);

// Create rocksdb checkpoint.
fs::create_dir_all(&checkpoint_dir).unwrap();

AptosDB::open(
&source_dir,
false, /* readonly */
NO_OP_STORAGE_PRUNER_CONFIG, /* pruner */
RocksdbConfigs::default(),
false,
BUFFERED_STATE_TARGET_ITEMS,
DEFAULT_MAX_NUM_NODES_PER_LRU_CACHE_SHARD,
)
.expect("AptosDB open failure.")
.create_checkpoint(&checkpoint_dir)
.expect("AptosDB checkpoint creation failed.");

consensus::create_checkpoint(&source_dir, &checkpoint_dir)
.expect("ConsensusDB checkpoint creation failed.");
let state_sync_db =
state_sync_driver::metadata_storage::PersistentMetadataStorage::new(&source_dir);
state_sync_db
.create_checkpoint(&checkpoint_dir)
.expect("StateSyncDB checkpoint creation failed.");
}

pub fn setup_environment(
node_config: NodeConfig,
mut node_config: NodeConfig,
remote_log_rx: Option<mpsc::Receiver<TelemetryLog>>,
logger_filter_update_job: Option<LoggerFilterUpdater>,
) -> anyhow::Result<AptosHandle> {
Expand All @@ -580,6 +617,13 @@ pub fn setup_environment(
inspection_service::inspection_service::start_inspection_service(node_config_clone)
});

// If working_dir is provided, we will make RocksDb checkpoint for consensus_db,
// state_sync_db, ledger_db and state_merkle_db to the checkpoint_path, and running the node
// on the new path, so that the existing data won't change. For now this is a testonly feature.
if let Some(working_dir) = node_config.base.working_dir.clone() {
create_checkpoint_and_change_working_dir(&mut node_config, working_dir);
}

// Open the database
let mut instant = Instant::now();
let (aptos_db, db_rw) = DbReaderWriter::wrap(
Expand Down
9 changes: 9 additions & 0 deletions config/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ pub struct NodeConfig {
#[serde(default, deny_unknown_fields)]
pub struct BaseConfig {
pub data_dir: PathBuf,
pub working_dir: Option<PathBuf>,
pub role: RoleType,
pub waypoint: WaypointConfig,
}
Expand All @@ -109,6 +110,7 @@ impl Default for BaseConfig {
fn default() -> BaseConfig {
BaseConfig {
data_dir: PathBuf::from("/opt/aptos/data"),
working_dir: None,
role: RoleType::Validator,
waypoint: WaypointConfig::None,
}
Expand Down Expand Up @@ -266,6 +268,13 @@ impl NodeConfig {
&self.base.data_dir
}

pub fn working_dir(&self) -> &Path {
match &self.base.working_dir {
Some(working_dir) => working_dir,
None => &self.base.data_dir,
}
}

pub fn set_data_dir(&mut self, data_dir: PathBuf) {
self.base.data_dir = data_dir.clone();
self.consensus.set_data_dir(data_dir.clone());
Expand Down
16 changes: 16 additions & 0 deletions consensus/src/consensusdb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@ use std::{collections::HashMap, iter::Iterator, path::Path, time::Instant};
/// The name of the consensus db file
pub const CONSENSUS_DB_NAME: &str = "consensus_db";

/// Creates new physical DB checkpoint in directory specified by `checkpoint_path`.
pub fn create_checkpoint<P: AsRef<Path> + Clone>(db_path: P, checkpoint_path: P) -> Result<()> {
let start = Instant::now();
let consensus_db_checkpoint_path = checkpoint_path.as_ref().join(CONSENSUS_DB_NAME);
std::fs::remove_dir_all(&consensus_db_checkpoint_path).unwrap_or(());
ConsensusDB::new(db_path)
.db
.create_checkpoint(&consensus_db_checkpoint_path)?;
info!(
path = consensus_db_checkpoint_path,
time_ms = %start.elapsed().as_millis(),
"Made ConsensusDB checkpoint."
);
Ok(())
}

pub struct ConsensusDB {
db: DB,
}
Expand Down
2 changes: 2 additions & 0 deletions consensus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ pub mod network_interface;
/// Required by the smoke tests
pub use consensusdb::CONSENSUS_DB_NAME;

pub use consensusdb::create_checkpoint;

#[cfg(feature = "fuzzing")]
pub use round_manager::round_manager_fuzzing;

Expand Down
2 changes: 1 addition & 1 deletion docker/compose/aptos-node/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ consensus:
type: "local"
backend:
type: "on_disk_storage"
path: /opt/aptos/data/secure-data.json
path: secure-data.json
namespace: ~
initial_safety_rules_config:
from_file:
Expand Down
14 changes: 14 additions & 0 deletions state-sync/state-sync-v2/state-sync-driver/src/metadata_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,20 @@ impl PersistentMetadataStorage {
))
})
}

/// Creates new physical DB checkpoint in directory specified by `path`.
pub fn create_checkpoint<P: AsRef<Path>>(&self, path: P) -> Result<()> {
let start = Instant::now();
let state_sync_db_path = path.as_ref().join(STATE_SYNC_DB_NAME);
std::fs::remove_dir_all(&state_sync_db_path).unwrap_or(());
self.database.create_checkpoint(&state_sync_db_path)?;
info!(
path = state_sync_db_path,
time_ms = %start.elapsed().as_millis(),
"Made StateSyncDB checkpoint."
);
Ok(())
}
}

impl MetadataStorageInterface for PersistentMetadataStorage {
Expand Down
20 changes: 12 additions & 8 deletions storage/aptosdb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ use aptos_crypto::hash::HashValue;
use aptos_infallible::Mutex;
use aptos_logger::prelude::*;
use aptos_rocksdb_options::gen_rocksdb_options;
use aptos_types::proof::TransactionAccumulatorSummary;
use aptos_types::state_store::state_storage_usage::StateStorageUsage;
use aptos_types::{
account_address::AccountAddress,
account_config::{new_block_event_key, NewBlockEvent},
Expand All @@ -77,12 +75,13 @@ use aptos_types::{
ledger_info::LedgerInfoWithSignatures,
proof::{
accumulator::InMemoryAccumulator, AccumulatorConsistencyProof, SparseMerkleProofExt,
TransactionInfoListWithProof,
TransactionAccumulatorSummary, TransactionInfoListWithProof,
},
state_proof::StateProof,
state_store::{
state_key::StateKey,
state_key_prefix::StateKeyPrefix,
state_storage_usage::StateStorageUsage,
state_value::{StateValue, StateValueChunkWithProof},
table::{TableHandle, TableInfo},
},
Expand All @@ -108,12 +107,15 @@ use std::{
time::{Duration, Instant},
};

use crate::pruner::{
ledger_pruner_manager::LedgerPrunerManager, ledger_store::ledger_store_pruner::LedgerPruner,
state_pruner_manager::StatePrunerManager, state_store::StateMerklePruner,
use crate::{
pruner::{
ledger_pruner_manager::LedgerPrunerManager,
ledger_store::ledger_store_pruner::LedgerPruner, state_pruner_manager::StatePrunerManager,
state_store::StateMerklePruner,
},
stale_node_index::StaleNodeIndexSchema,
stale_node_index_cross_epoch::StaleNodeIndexCrossEpochSchema,
};
use crate::stale_node_index::StaleNodeIndexSchema;
use crate::stale_node_index_cross_epoch::StaleNodeIndexCrossEpochSchema;
use storage_interface::{
state_delta::StateDelta, state_view::DbStateView, DbReader, DbWriter, ExecutedTrees, Order,
StateSnapshotReceiver, MAX_REQUEST_LIMIT,
Expand Down Expand Up @@ -655,6 +657,8 @@ impl AptosDB {
let start = Instant::now();
let ledger_db_path = path.as_ref().join(LEDGER_DB_NAME);
let state_merkle_db_path = path.as_ref().join(STATE_MERKLE_DB_NAME);
std::fs::remove_dir_all(&ledger_db_path).unwrap_or(());
std::fs::remove_dir_all(&state_merkle_db_path).unwrap_or(());
self.ledger_db.create_checkpoint(&ledger_db_path)?;
self.state_merkle_db
.create_checkpoint(&state_merkle_db_path)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ consensus:
type: "local"
backend:
type: "on_disk_storage"
path: /opt/aptos/data/secure-data.json
path: secure-data.json
namespace: ~
initial_safety_rules_config:
from_file:
Expand Down
2 changes: 1 addition & 1 deletion testsuite/forge/src/backend/local/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ impl Node for LocalNode {
let node_config = self.config();
let ledger_db_path = node_config.storage.dir().join(LEDGER_DB_NAME);
let state_db_path = node_config.storage.dir().join(STATE_MERKLE_DB_NAME);
let secure_storage_path = node_config.base.data_dir.join("secure_storage.json");
let secure_storage_path = node_config.working_dir().join("secure_storage.json");
let state_sync_db_path = node_config.storage.dir().join(STATE_SYNC_DB_NAME);

debug!(
Expand Down

0 comments on commit 7b0074a

Please sign in to comment.