From a1801b4ac50e350798346c1b2182ab4e1d6406d0 Mon Sep 17 00:00:00 2001 From: brooks Date: Wed, 6 Sep 2023 10:14:36 -0400 Subject: [PATCH] Shares accounts hash cache data between full and incremental --- accounts-db/src/accounts_db.rs | 27 +++++++++++++-------------- accounts-db/src/cache_hash_data.rs | 17 ++++++++++------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 85c32eb057342f..99032350bb6765 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -1487,8 +1487,7 @@ pub struct AccountsDb { #[allow(dead_code)] base_working_temp_dir: Option, - full_accounts_hash_cache_path: PathBuf, - incremental_accounts_hash_cache_path: PathBuf, + accounts_hash_cache_path: PathBuf, transient_accounts_hash_cache_path: PathBuf, pub shrink_paths: RwLock>>, @@ -2487,9 +2486,8 @@ impl AccountsDb { paths: vec![], base_working_path, base_working_temp_dir, - full_accounts_hash_cache_path: accounts_hash_cache_path.join("full"), - incremental_accounts_hash_cache_path: accounts_hash_cache_path.join("incremental"), transient_accounts_hash_cache_path: accounts_hash_cache_path.join("transient"), + accounts_hash_cache_path, shrink_paths: RwLock::new(None), temp_paths: None, file_size: DEFAULT_FILE_SIZE, @@ -7626,18 +7624,20 @@ impl AccountsDb { fn get_cache_hash_data( accounts_hash_cache_path: PathBuf, config: &CalcAccountsHashConfig<'_>, + kind: CalcAccountsHashKind, slot: Slot, ) -> CacheHashData { - if !config.store_detailed_debug_info_on_failure { - CacheHashData::new(accounts_hash_cache_path) + let accounts_hash_cache_path = if !config.store_detailed_debug_info_on_failure { + accounts_hash_cache_path } else { // this path executes when we are failing with a hash mismatch let failed_dir = accounts_hash_cache_path .join("failed_calculate_accounts_hash_cache") .join(slot.to_string()); - let _ = std::fs::remove_dir_all(&failed_dir); - CacheHashData::new(failed_dir) - } + _ = std::fs::remove_dir_all(&failed_dir); + failed_dir + }; + CacheHashData::new(accounts_hash_cache_path, kind == CalcAccountsHashKind::Full) } // modeled after calculate_accounts_delta_hash @@ -7653,7 +7653,6 @@ impl AccountsDb { storages, stats, CalcAccountsHashKind::Full, - self.full_accounts_hash_cache_path.clone(), )?; let AccountsHashKind::Full(accounts_hash) = accounts_hash else { panic!("calculate_accounts_hash_from_storages must return a FullAccountsHash"); @@ -7681,7 +7680,6 @@ impl AccountsDb { storages, stats, CalcAccountsHashKind::Incremental, - self.incremental_accounts_hash_cache_path.clone(), )?; let AccountsHashKind::Incremental(incremental_accounts_hash) = accounts_hash else { panic!("calculate_incremental_accounts_hash must return an IncrementalAccountsHash"); @@ -7695,7 +7693,6 @@ impl AccountsDb { storages: &SortedStorages<'_>, mut stats: HashStats, kind: CalcAccountsHashKind, - accounts_hash_cache_path: PathBuf, ) -> Result<(AccountsHashKind, u64), AccountsHashVerificationError> { let total_time = Measure::start(""); let _guard = self.active_stats.activate(ActiveStatItem::Hash); @@ -7705,10 +7702,12 @@ impl AccountsDb { let slot = storages.max_slot_inclusive(); let use_bg_thread_pool = config.use_bg_thread_pool; + let accounts_hash_cache_path = self.accounts_hash_cache_path.clone(); let scan_and_hash = || { let (cache_hash_data, cache_hash_data_us) = measure_us!(Self::get_cache_hash_data( accounts_hash_cache_path, config, + kind, slot )); stats.cache_hash_data_us += cache_hash_data_us; @@ -9971,7 +9970,7 @@ pub mod tests { let temp_dir = TempDir::new().unwrap(); let accounts_hash_cache_path = temp_dir.path().to_path_buf(); self.scan_snapshot_stores_with_cache( - &CacheHashData::new(accounts_hash_cache_path), + &CacheHashData::new(accounts_hash_cache_path, true), storage, stats, bins, @@ -11011,7 +11010,7 @@ pub mod tests { }; let result = accounts_db.scan_account_storage_no_bank( - &CacheHashData::new(accounts_hash_cache_path), + &CacheHashData::new(accounts_hash_cache_path, true), &CalcAccountsHashConfig::default(), &get_storage_refs(&[storage]), test_scan, diff --git a/accounts-db/src/cache_hash_data.rs b/accounts-db/src/cache_hash_data.rs index 196474f49c13dd..50e85af9a89116 100644 --- a/accounts-db/src/cache_hash_data.rs +++ b/accounts-db/src/cache_hash_data.rs @@ -196,29 +196,32 @@ impl CacheHashDataFile { } } -pub type PreExistingCacheFiles = HashSet; pub struct CacheHashData { cache_dir: PathBuf, - pre_existing_cache_files: Arc>, + pre_existing_cache_files: Arc>>, + should_delete_old_cache_files_on_drop: bool, pub stats: Arc, } impl Drop for CacheHashData { fn drop(&mut self) { - self.delete_old_cache_files(); + if self.should_delete_old_cache_files_on_drop { + self.delete_old_cache_files(); + } self.stats.report(); } } impl CacheHashData { - pub fn new(cache_dir: PathBuf) -> CacheHashData { + pub fn new(cache_dir: PathBuf, should_delete_old_cache_files_on_drop: bool) -> CacheHashData { std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| { panic!("error creating cache dir {}: {err}", cache_dir.display()) }); let result = CacheHashData { cache_dir, - pre_existing_cache_files: Arc::new(Mutex::new(PreExistingCacheFiles::default())), + pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())), + should_delete_old_cache_files_on_drop, stats: Arc::default(), }; @@ -281,7 +284,7 @@ impl CacheHashData { }) } - pub(crate) fn pre_existing_cache_file_will_be_used(&self, file_name: impl AsRef) { + fn pre_existing_cache_file_will_be_used(&self, file_name: impl AsRef) { self.pre_existing_cache_files .lock() .unwrap() @@ -424,7 +427,7 @@ mod tests { data_this_pass.push(this_bin_data); } } - let cache = CacheHashData::new(cache_dir.clone()); + let cache = CacheHashData::new(cache_dir.clone(), true); let file_name = PathBuf::from("test"); cache.save(&file_name, &data_this_pass).unwrap(); cache.get_cache_files();