Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.18: Adds cache hash data deletion policy enum (backport of #34956) #34962

Merged
merged 1 commit into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ use {
append_vec::{
aligned_stored_size, AppendVec, APPEND_VEC_MMAPPED_FILES_OPEN, STORE_META_OVERHEAD,
},
cache_hash_data::{CacheHashData, CacheHashDataFileReference},
cache_hash_data::{
CacheHashData, CacheHashDataFileReference, DeletionPolicy as CacheHashDeletionPolicy,
},
contains::Contains,
epoch_accounts_hash::EpochAccountsHashManager,
in_mem_accounts_index::StartupStats,
Expand Down Expand Up @@ -7549,10 +7551,13 @@ impl AccountsDb {
_ = std::fs::remove_dir_all(&failed_dir);
failed_dir
};
CacheHashData::new(
accounts_hash_cache_path,
(kind == CalcAccountsHashKind::Incremental).then_some(storages_start_slot),
)
let deletion_policy = match kind {
CalcAccountsHashKind::Full => CacheHashDeletionPolicy::AllUnused,
CalcAccountsHashKind::Incremental => {
CacheHashDeletionPolicy::UnusedAtLeast(storages_start_slot)
}
};
CacheHashData::new(accounts_hash_cache_path, deletion_policy)
}

// modeled after calculate_accounts_delta_hash
Expand Down Expand Up @@ -9775,7 +9780,7 @@ pub mod tests {
let temp_dir = TempDir::new().unwrap();
let accounts_hash_cache_path = temp_dir.path().to_path_buf();
self.scan_snapshot_stores_with_cache(
&CacheHashData::new(accounts_hash_cache_path, None),
&CacheHashData::new(accounts_hash_cache_path, CacheHashDeletionPolicy::AllUnused),
storage,
stats,
bins,
Expand Down Expand Up @@ -10843,7 +10848,7 @@ pub mod tests {
};

let result = accounts_db.scan_account_storage_no_bank(
&CacheHashData::new(accounts_hash_cache_path, None),
&CacheHashData::new(accounts_hash_cache_path, CacheHashDeletionPolicy::AllUnused),
&CalcAccountsHashConfig::default(),
&get_storage_refs(&[storage]),
test_scan,
Expand Down
56 changes: 36 additions & 20 deletions accounts-db/src/cache_hash_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,7 @@ impl CacheHashDataFile {
pub(crate) struct CacheHashData {
cache_dir: PathBuf,
pre_existing_cache_files: Arc<Mutex<HashSet<PathBuf>>>,
/// Decides which old cache files to delete. See `delete_old_cache_files()` for more info.
storages_start_slot: Option<Slot>,
deletion_policy: DeletionPolicy,
pub stats: Arc<CacheHashDataStats>,
}

Expand All @@ -206,15 +205,15 @@ impl Drop for CacheHashData {
}

impl CacheHashData {
pub(crate) fn new(cache_dir: PathBuf, storages_start_slot: Option<Slot>) -> CacheHashData {
pub(crate) fn new(cache_dir: PathBuf, deletion_policy: DeletionPolicy) -> CacheHashData {
std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| {
panic!("error creating cache dir {}: {err}", cache_dir.display())
});

let result = CacheHashData {
cache_dir,
pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())),
storages_start_slot,
deletion_policy,
stats: Arc::default(),
};

Expand All @@ -229,21 +228,24 @@ impl CacheHashData {
let mut old_cache_files =
std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap());

// If `storages_start_slot` is None, we're doing a full accounts hash calculation, and thus
// all unused cache files can be deleted.
// If `storages_start_slot` is Some, we're doing an incremental accounts hash calculation,
// and we only want to delete the unused cache files *that IAH considered*.
if let Some(storages_start_slot) = self.storages_start_slot {
old_cache_files.retain(|old_cache_file| {
let Some(parsed_filename) = parse_filename(old_cache_file) else {
// if parsing the cache filename fails, we *do* want to delete it
return true;
};

// if the old cache file is in the incremental accounts hash calculation range,
// then delete it
parsed_filename.slot_range_start >= storages_start_slot
});
match self.deletion_policy {
DeletionPolicy::AllUnused => {
// no additional work to do here; we will delete everything in `old_cache_files`
}
DeletionPolicy::UnusedAtLeast(storages_start_slot) => {
// when calculating an incremental accounts hash, we only want to delete the unused
// cache files *that IAH considered*
old_cache_files.retain(|old_cache_file| {
let Some(parsed_filename) = parse_filename(old_cache_file) else {
// if parsing the cache filename fails, we *do* want to delete it
return true;
};

// if the old cache file is in the incremental accounts hash calculation range,
// then delete it
parsed_filename.slot_range_start >= storages_start_slot
});
}
}

if !old_cache_files.is_empty() {
Expand Down Expand Up @@ -410,6 +412,19 @@ fn parse_filename(cache_filename: impl AsRef<Path>) -> Option<ParsedFilename> {
})
}

/// Decides which old cache files to delete
///
/// See `delete_old_cache_files()` for more info.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum DeletionPolicy {
/// Delete *all* the unused cache files
/// Should be used when calculating full accounts hash
AllUnused,
/// Delete *only* the unused cache files with starting slot range *at least* this slot
/// Should be used when calculating incremental accounts hash
UnusedAtLeast(Slot),
}

#[cfg(test)]
mod tests {
use {super::*, crate::accounts_hash::AccountHash, rand::Rng};
Expand Down Expand Up @@ -477,7 +492,8 @@ mod tests {
data_this_pass.push(this_bin_data);
}
}
let cache = CacheHashData::new(cache_dir.clone(), None);
let cache =
CacheHashData::new(cache_dir.clone(), DeletionPolicy::AllUnused);
let file_name = PathBuf::from("test");
cache.save(&file_name, &data_this_pass).unwrap();
cache.get_cache_files();
Expand Down
Loading