From 08eb469aaaeb21f21da5e2a7a74917798d1287ef Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 11:12:43 -0600 Subject: [PATCH] v1.16: Add ability to output Bank hash details (backport of #32632) (#34257) * Add ability to output components that go into Bank hash (#32632) When a consensus divergance occurs, the current workflow involves a handful of manual steps to hone in on the offending slot and transaction. This process isn't overly difficult to execute; however, it is tedious and currently involves creating and parsing logs. This change introduces functionality to output a debug file that contains the components go into the bank hash. The file can be generated in two ways: - Via solana-validator when the node realizes it has diverged - Via solana-ledger-tool verify by passing a flag When a divergance occurs now, the steps to debug would be: - Grab the file from the node that diverged - Generate a file for the same slot with ledger-tool with a known good version - Diff the files, they are pretty-printed json (cherry picked from commit 6bbf514e78326c8cfcd4fb736a3f817e2811be40) # Conflicts: # Cargo.lock # ledger-tool/src/args.rs # ledger-tool/src/main.rs # programs/sbf/Cargo.lock # runtime/Cargo.toml # runtime/src/accounts_db.rs # validator/src/main.rs * Merge conflict * Reorder base_wotking with accounts_hash to match other branches --------- Co-authored-by: steviez --- Cargo.lock | 3 + core/src/replay_stage.rs | 3 +- ledger-tool/src/args.rs | 1 + ledger-tool/src/main.rs | 15 +- programs/sbf/Cargo.lock | 3 + runtime/Cargo.toml | 3 + runtime/src/accounts_db.rs | 97 +++++++-- runtime/src/bank.rs | 1 + runtime/src/bank/bank_hash_details.rs | 277 ++++++++++++++++++++++++++ validator/src/main.rs | 1 + 10 files changed, 390 insertions(+), 14 deletions(-) create mode 100644 runtime/src/bank/bank_hash_details.rs diff --git a/Cargo.lock b/Cargo.lock index e0544d60b98a1f..66e12aa864295b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6557,6 +6557,7 @@ version = "1.16.21" dependencies = [ "arrayref", "assert_matches", + "base64 0.21.2", "bincode", "blake3", "bv", @@ -6594,6 +6595,7 @@ dependencies = [ "rustc_version 0.4.0", "serde", "serde_derive", + "serde_json", "solana-address-lookup-table-program", "solana-bpf-loader-program", "solana-bucket-map", @@ -6611,6 +6613,7 @@ dependencies = [ "solana-sdk", "solana-stake-program", "solana-system-program", + "solana-version", "solana-vote-program", "solana-zk-token-proof-program", "solana-zk-token-sdk", diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 62813a67485433..7fbbeda54f61f8 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -59,7 +59,7 @@ use { solana_rpc_client_api::response::SlotUpdate, solana_runtime::{ accounts_background_service::AbsRequestSender, - bank::{Bank, NewBankOptions}, + bank::{bank_hash_details, Bank, NewBankOptions}, bank_forks::{BankForks, MAX_ROOT_DISTANCE_FOR_VOTE_ONLY}, commitment::BlockCommitmentCache, prioritization_fee_cache::PrioritizationFeeCache, @@ -1502,6 +1502,7 @@ impl ReplayStage { let bank = w_bank_forks .remove(*slot) .expect("BankForks should not have been purged yet"); + let _ = bank_hash_details::write_bank_hash_details_file(&bank); ((*slot, bank.bank_id()), bank) }) .unzip() diff --git a/ledger-tool/src/args.rs b/ledger-tool/src/args.rs index 4da1f671b77e75..b7cbaf9c44df8d 100644 --- a/ledger-tool/src/args.rs +++ b/ledger-tool/src/args.rs @@ -57,6 +57,7 @@ pub fn get_accounts_db_config( AccountsDbConfig { index: Some(accounts_index_config), + base_working_path: Some(ledger_path.to_path_buf()), accounts_hash_cache_path: Some( ledger_path.join(AccountsDb::DEFAULT_ACCOUNTS_HASH_CACHE_DIR), ), diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index 1b7eba75d6c2a0..5bd003b3030aa7 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -47,7 +47,7 @@ use { accounts::Accounts, accounts_db::CalcAccountsHashDataSource, accounts_index::ScanConfig, - bank::{Bank, RewardCalculationEvent, TotalAccountsStats}, + bank::{bank_hash_details, Bank, RewardCalculationEvent, TotalAccountsStats}, bank_forks::BankForks, cost_model::CostModel, cost_tracker::CostTracker, @@ -1629,6 +1629,14 @@ fn main() { .takes_value(false) .help("After verifying the ledger, print some information about the account stores"), ) + .arg( + Arg::with_name("write_bank_file") + .long("write-bank-file") + .takes_value(false) + .help("After verifying the ledger, write a file that contains the information \ + that went into computing the completed bank's bank hash. The file will be \ + written within /bank_hash_details/"), + ) ).subcommand( SubCommand::with_name("graph") .about("Create a Graphviz rendering of the ledger") @@ -2592,6 +2600,7 @@ fn main() { ..ProcessOptions::default() }; let print_accounts_stats = arg_matches.is_present("print_accounts_stats"); + let write_bank_file = arg_matches.is_present("write_bank_file"); let genesis_config = open_genesis_config_by(&ledger_path, arg_matches); info!("genesis hash: {}", genesis_config.hash()); @@ -2617,6 +2626,10 @@ fn main() { let working_bank = bank_forks.read().unwrap().working_bank(); working_bank.print_accounts_stats(); } + if write_bank_file { + let working_bank = bank_forks.read().unwrap().working_bank(); + let _ = bank_hash_details::write_bank_hash_details_file(&working_bank); + } exit_signal.store(true, Ordering::Relaxed); system_monitor_service.join().unwrap(); } diff --git a/programs/sbf/Cargo.lock b/programs/sbf/Cargo.lock index 61ea7292e1fbd6..f19c89132c23d4 100644 --- a/programs/sbf/Cargo.lock +++ b/programs/sbf/Cargo.lock @@ -5416,6 +5416,7 @@ name = "solana-runtime" version = "1.16.21" dependencies = [ "arrayref", + "base64 0.21.2", "bincode", "blake3", "bv", @@ -5449,6 +5450,7 @@ dependencies = [ "rustc_version", "serde", "serde_derive", + "serde_json", "solana-address-lookup-table-program", "solana-bpf-loader-program", "solana-bucket-map", @@ -5465,6 +5467,7 @@ dependencies = [ "solana-sdk", "solana-stake-program", "solana-system-program", + "solana-version", "solana-vote-program", "solana-zk-token-proof-program", "solana-zk-token-sdk", diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index de1e86cf053332..27052d5719bb4b 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -11,6 +11,7 @@ edition = { workspace = true } [dependencies] arrayref = { workspace = true } +base64 = { workspace = true } bincode = { workspace = true } blake3 = { workspace = true } bv = { workspace = true, features = ["serde"] } @@ -43,6 +44,7 @@ rayon = { workspace = true } regex = { workspace = true } serde = { workspace = true, features = ["rc"] } serde_derive = { workspace = true } +serde_json = { workspace = true } solana-address-lookup-table-program = { workspace = true } solana-bpf-loader-program = { workspace = true } solana-bucket-map = { workspace = true } @@ -59,6 +61,7 @@ solana-rayon-threadlimit = { workspace = true } solana-sdk = { workspace = true } solana-stake-program = { workspace = true } solana-system-program = { workspace = true } +solana-version = { workspace = true } solana-vote-program = { workspace = true } solana-zk-token-proof-program = { workspace = true } solana-zk-token-sdk = { workspace = true } diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 635e5c97bc34a4..464a5f191e4172 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -469,6 +469,7 @@ pub(crate) struct ShrinkCollect<'a, T: ShrinkCollectRefs<'a>> { pub const ACCOUNTS_DB_CONFIG_FOR_TESTING: AccountsDbConfig = AccountsDbConfig { index: Some(ACCOUNTS_INDEX_CONFIG_FOR_TESTING), + base_working_path: None, accounts_hash_cache_path: None, filler_accounts_config: FillerAccountsConfig::const_default(), write_cache_limit_bytes: None, @@ -480,6 +481,7 @@ pub const ACCOUNTS_DB_CONFIG_FOR_TESTING: AccountsDbConfig = AccountsDbConfig { }; pub const ACCOUNTS_DB_CONFIG_FOR_BENCHMARKS: AccountsDbConfig = AccountsDbConfig { index: Some(ACCOUNTS_INDEX_CONFIG_FOR_BENCHMARKS), + base_working_path: None, accounts_hash_cache_path: None, filler_accounts_config: FillerAccountsConfig::const_default(), write_cache_limit_bytes: None, @@ -539,6 +541,8 @@ const ANCIENT_APPEND_VEC_DEFAULT_OFFSET: Option = Some(-10_000); #[derive(Debug, Default, Clone)] pub struct AccountsDbConfig { pub index: Option, + /// Base directory for various necessary files + pub base_working_path: Option, pub accounts_hash_cache_path: Option, pub filler_accounts_config: FillerAccountsConfig, pub write_cache_limit_bytes: Option, @@ -1395,12 +1399,14 @@ pub struct AccountsDb { /// Set of storage paths to pick from pub(crate) paths: Vec, - accounts_hash_cache_path: PathBuf, - + /// Base directory for various necessary files + base_working_path: PathBuf, // used by tests // holds this until we are dropped #[allow(dead_code)] - temp_accounts_hash_cache_path: Option, + base_working_temp_dir: Option, + /// Directory for account hash calculations, within base_working_path + accounts_hash_cache_path: PathBuf, pub shrink_paths: RwLock>>, @@ -2347,29 +2353,47 @@ impl<'a> AppendVecScan for ScanState<'a> { } } +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct PubkeyHashAccount { + pub pubkey: Pubkey, + pub hash: Hash, + pub account: AccountSharedData, +} + impl AccountsDb { pub const DEFAULT_ACCOUNTS_HASH_CACHE_DIR: &str = "accounts_hash_cache"; pub fn default_for_tests() -> Self { - Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None) + Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None, None) } fn default_with_accounts_index( accounts_index: AccountInfoAccountsIndex, + base_working_path: Option, accounts_hash_cache_path: Option, ) -> Self { let num_threads = get_thread_count(); const MAX_READ_ONLY_CACHE_DATA_SIZE: usize = 400_000_000; // 400M bytes - let (accounts_hash_cache_path, temp_accounts_hash_cache_path) = - if let Some(accounts_hash_cache_path) = accounts_hash_cache_path { - (accounts_hash_cache_path, None) + let (base_working_path, base_working_temp_dir) = + if let Some(base_working_path) = base_working_path { + (base_working_path, None) } else { - let temp_dir = TempDir::new().expect("new tempdir"); - let cache_path = temp_dir.path().to_path_buf(); - (cache_path, Some(temp_dir)) + let temp_base_working_dir = TempDir::new().unwrap(); + let base_working_path = temp_base_working_dir.path().to_path_buf(); + (base_working_path, Some(temp_base_working_dir)) }; + let accounts_hash_cache_path = accounts_hash_cache_path.unwrap_or_else(|| { + let accounts_hash_cache_path = + base_working_path.join(Self::DEFAULT_ACCOUNTS_HASH_CACHE_DIR); + if !accounts_hash_cache_path.exists() { + std::fs::create_dir(&accounts_hash_cache_path) + .expect("create accounts hash cache dir"); + } + accounts_hash_cache_path + }); + let mut bank_hash_stats = HashMap::new(); bank_hash_stats.insert(0, BankHashStats::default()); @@ -2398,8 +2422,9 @@ impl AccountsDb { write_cache_limit_bytes: None, write_version: AtomicU64::new(0), paths: vec![], + base_working_path, + base_working_temp_dir, accounts_hash_cache_path, - temp_accounts_hash_cache_path, shrink_paths: RwLock::new(None), temp_paths: None, file_size: DEFAULT_FILE_SIZE, @@ -2477,6 +2502,9 @@ impl AccountsDb { accounts_db_config.as_mut().and_then(|x| x.index.take()), exit, ); + let base_working_path = accounts_db_config + .as_ref() + .and_then(|config| config.base_working_path.clone()); let accounts_hash_cache_path = accounts_db_config .as_ref() .and_then(|config| config.accounts_hash_cache_path.clone()); @@ -2534,7 +2562,11 @@ impl AccountsDb { .and_then(|x| x.write_cache_limit_bytes), partitioned_epoch_rewards_config, exhaustively_verify_refcounts, - ..Self::default_with_accounts_index(accounts_index, accounts_hash_cache_path) + ..Self::default_with_accounts_index( + accounts_index, + base_working_path, + accounts_hash_cache_path, + ) }; if paths_is_empty { // Create a temporary set of accounts directories, used primarily @@ -2581,6 +2613,11 @@ impl AccountsDb { self.file_size } + /// Get the base working directory + pub fn get_base_working_path(&self) -> PathBuf { + self.base_working_path.clone() + } + pub fn new_single_for_tests() -> Self { AccountsDb::new_for_tests(Vec::new(), &ClusterType::Development) } @@ -7775,6 +7812,42 @@ impl AccountsDb { (hashes, scan.as_us(), accumulate) } + /// Return all of the accounts for a given slot + pub fn get_pubkey_hash_account_for_slot(&self, slot: Slot) -> Vec { + type ScanResult = + ScanStorageResult>; + let scan_result: ScanResult = self.scan_account_storage( + slot, + |loaded_account: LoadedAccount| { + // Cache only has one version per key, don't need to worry about versioning + Some(PubkeyHashAccount { + pubkey: *loaded_account.pubkey(), + hash: loaded_account.loaded_hash(), + account: loaded_account.take_account(), + }) + }, + |accum: &DashMap, loaded_account: LoadedAccount| { + // Storage may have duplicates so only keep the latest version for each key + accum.insert( + *loaded_account.pubkey(), + (loaded_account.loaded_hash(), loaded_account.take_account()), + ); + }, + ); + + match scan_result { + ScanStorageResult::Cached(cached_result) => cached_result, + ScanStorageResult::Stored(stored_result) => stored_result + .into_iter() + .map(|(pubkey, (hash, account))| PubkeyHashAccount { + pubkey, + hash, + account, + }) + .collect(), + } + } + /// Calculate accounts delta hash for `slot` /// /// As part of calculating the accounts delta hash, get a list of accounts modified this slot diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index eb31085c3a1dd6..d237113474776d 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -200,6 +200,7 @@ struct VerifyAccountsHashConfig { } mod address_lookup_table; +pub mod bank_hash_details; mod builtin_programs; mod metrics; mod sysvar_cache; diff --git a/runtime/src/bank/bank_hash_details.rs b/runtime/src/bank/bank_hash_details.rs new file mode 100644 index 00000000000000..164e4c91f43442 --- /dev/null +++ b/runtime/src/bank/bank_hash_details.rs @@ -0,0 +1,277 @@ +//! Container to capture information relevant to computing a bank hash + +use { + super::Bank, + crate::{accounts_db::PubkeyHashAccount, accounts_hash::AccountsDeltaHash}, + base64::{prelude::BASE64_STANDARD, Engine}, + log::*, + serde::{ + de::{self, Deserialize, Deserializer}, + ser::{Serialize, SerializeSeq, Serializer}, + }, + solana_sdk::{ + account::{Account, AccountSharedData, ReadableAccount}, + clock::{Epoch, Slot}, + hash::Hash, + pubkey::Pubkey, + }, + std::str::FromStr, +}; + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub(crate) struct BankHashDetails { + /// client version + pub version: String, + pub account_data_encoding: String, + pub slot: Slot, + pub bank_hash: String, + pub parent_bank_hash: String, + pub accounts_delta_hash: String, + pub signature_count: u64, + pub last_blockhash: String, + pub accounts: BankHashAccounts, +} + +impl BankHashDetails { + pub fn new( + slot: Slot, + bank_hash: Hash, + parent_bank_hash: Hash, + accounts_delta_hash: Hash, + signature_count: u64, + last_blockhash: Hash, + accounts: BankHashAccounts, + ) -> Self { + Self { + version: solana_version::version!().to_string(), + account_data_encoding: "base64".to_string(), + slot, + bank_hash: bank_hash.to_string(), + parent_bank_hash: parent_bank_hash.to_string(), + accounts_delta_hash: accounts_delta_hash.to_string(), + signature_count, + last_blockhash: last_blockhash.to_string(), + accounts, + } + } +} + +impl TryFrom<&Bank> for BankHashDetails { + type Error = String; + + fn try_from(bank: &Bank) -> Result { + let slot = bank.slot(); + if !bank.is_frozen() { + return Err(format!( + "Bank {slot} must be frozen in order to get bank hash details" + )); + } + + // This bank is frozen; as a result, we know that the state has been + // hashed which means the delta hash is Some(). So, .unwrap() is safe + let AccountsDeltaHash(accounts_delta_hash) = bank + .rc + .accounts + .accounts_db + .get_accounts_delta_hash(slot) + .unwrap(); + let mut accounts = bank + .rc + .accounts + .accounts_db + .get_pubkey_hash_account_for_slot(slot); + // get_pubkey_hash_account_for_slot() returns an arbitrary ordering; + // sort by pubkey to match the ordering used for accounts delta hash + accounts.sort_by_key(|account| account.pubkey); + + Ok(Self::new( + slot, + bank.hash(), + bank.parent_hash(), + accounts_delta_hash, + bank.signature_count(), + bank.last_blockhash(), + BankHashAccounts { accounts }, + )) + } +} + +// Wrap the Vec<...> so we can implement custom Serialize/Deserialize traits on the wrapper type +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct BankHashAccounts { + pub accounts: Vec, +} + +#[derive(Deserialize, Serialize)] +/// Used as an intermediate for serializing and deserializing account fields +/// into a human readable format. +struct SerdeAccount { + pubkey: String, + hash: String, + owner: String, + lamports: u64, + rent_epoch: Epoch, + executable: bool, + data: String, +} + +impl From<&PubkeyHashAccount> for SerdeAccount { + fn from(pubkey_hash_account: &PubkeyHashAccount) -> Self { + let PubkeyHashAccount { + pubkey, + hash, + account, + } = pubkey_hash_account; + Self { + pubkey: pubkey.to_string(), + hash: hash.to_string(), + owner: account.owner().to_string(), + lamports: account.lamports(), + rent_epoch: account.rent_epoch(), + executable: account.executable(), + data: BASE64_STANDARD.encode(account.data()), + } + } +} + +impl TryFrom for PubkeyHashAccount { + type Error = String; + + fn try_from(temp_account: SerdeAccount) -> Result { + let pubkey = Pubkey::from_str(&temp_account.pubkey).map_err(|err| err.to_string())?; + let hash = Hash::from_str(&temp_account.hash).map_err(|err| err.to_string())?; + + let account = AccountSharedData::from(Account { + lamports: temp_account.lamports, + data: BASE64_STANDARD + .decode(temp_account.data) + .map_err(|err| err.to_string())?, + owner: Pubkey::from_str(&temp_account.owner).map_err(|err| err.to_string())?, + executable: temp_account.executable, + rent_epoch: temp_account.rent_epoch, + }); + + Ok(Self { + pubkey, + hash, + account, + }) + } +} + +impl Serialize for BankHashAccounts { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut seq = serializer.serialize_seq(Some(self.accounts.len()))?; + for account in self.accounts.iter() { + let temp_account = SerdeAccount::from(account); + seq.serialize_element(&temp_account)?; + } + seq.end() + } +} + +impl<'de> Deserialize<'de> for BankHashAccounts { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let temp_accounts: Vec = Deserialize::deserialize(deserializer)?; + let pubkey_hash_accounts: Result, _> = temp_accounts + .into_iter() + .map(PubkeyHashAccount::try_from) + .collect(); + let pubkey_hash_accounts = pubkey_hash_accounts.map_err(de::Error::custom)?; + Ok(BankHashAccounts { + accounts: pubkey_hash_accounts, + }) + } +} + +/// Output the components that comprise bank hash +pub fn write_bank_hash_details_file(bank: &Bank) -> std::result::Result<(), String> { + let details = BankHashDetails::try_from(bank)?; + + let slot = details.slot; + let hash = &details.bank_hash; + let file_name = format!("{slot}-{hash}.json"); + let parent_dir = bank + .rc + .accounts + .accounts_db + .get_base_working_path() + .join("bank_hash_details"); + let path = parent_dir.join(file_name); + // A file with the same name implies the same hash for this slot. Skip + // rewriting a duplicate file in this scenario + if !path.exists() { + info!("writing details of bank {} to {}", slot, path.display()); + + // std::fs::write may fail (depending on platform) if the full directory + // path does not exist. So, call std::fs_create_dir_all first. + // https://doc.rust-lang.org/std/fs/fn.write.html + _ = std::fs::create_dir_all(parent_dir); + let file = std::fs::File::create(&path).map_err(|err| { + format!( + "Unable to create bank hash file at {}: {err}", + path.display() + ) + })?; + serde_json::to_writer_pretty(file, &details) + .map_err(|err| format!("Unable to write bank hash file contents: {err}"))?; + } + Ok(()) +} + +#[cfg(test)] +pub mod tests { + use super::*; + + #[test] + fn test_serde_bank_hash_details() { + use solana_sdk::hash::hash; + + let slot = 123_456_789; + let signature_count = 314; + + let account = AccountSharedData::from(Account { + lamports: 123_456_789, + data: vec![0, 9, 1, 8, 2, 7, 3, 6, 4, 5], + owner: Pubkey::new_unique(), + executable: true, + rent_epoch: 123, + }); + let account_pubkey = Pubkey::new_unique(); + let account_hash = hash("account".as_bytes()); + let accounts = BankHashAccounts { + accounts: vec![PubkeyHashAccount { + pubkey: account_pubkey, + hash: account_hash, + account, + }], + }; + + let bank_hash = hash("bank".as_bytes()); + let parent_bank_hash = hash("parent_bank".as_bytes()); + let accounts_delta_hash = hash("accounts_delta".as_bytes()); + let last_blockhash = hash("last_blockhash".as_bytes()); + + let bank_hash_details = BankHashDetails::new( + slot, + bank_hash, + parent_bank_hash, + accounts_delta_hash, + signature_count, + last_blockhash, + accounts, + ); + + let serialized_bytes = serde_json::to_vec(&bank_hash_details).unwrap(); + let deserialized_bank_hash_details: BankHashDetails = + serde_json::from_slice(&serialized_bytes).unwrap(); + + assert_eq!(bank_hash_details, deserialized_bank_hash_details); + } +} diff --git a/validator/src/main.rs b/validator/src/main.rs index 4e699b382f2734..f888adae283f13 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -1181,6 +1181,7 @@ pub fn main() { let accounts_db_config = AccountsDbConfig { index: Some(accounts_index_config), + base_working_path: Some(ledger_path.clone()), accounts_hash_cache_path: Some(accounts_hash_cache_path), filler_accounts_config, write_cache_limit_bytes: value_t!(matches, "accounts_db_cache_limit_mb", u64)