From aaafef74803ca4e2050b8cd83ef352c5a59509b4 Mon Sep 17 00:00:00 2001 From: Xiang Zhu Date: Mon, 10 Apr 2023 10:40:25 -0700 Subject: [PATCH 1/6] Add remove_incomplete_bank_snapshot_dir --- core/src/validator.rs | 13 ++++++-- ledger-tool/src/main.rs | 19 ++++++----- runtime/src/snapshot_utils.rs | 59 +++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 9 deletions(-) diff --git a/core/src/validator.rs b/core/src/validator.rs index cd48735b919615..3f05c54e33a3b7 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -90,7 +90,7 @@ use { snapshot_archive_info::SnapshotArchiveInfoGetter, snapshot_config::SnapshotConfig, snapshot_hash::StartingSnapshotHashes, - snapshot_utils::{self, clean_orphaned_account_snapshot_dirs, move_and_async_delete_path}, + snapshot_utils::{self, move_and_async_delete_path}, }, solana_sdk::{ clock::Slot, @@ -553,8 +553,17 @@ impl Validator { start.stop(); info!("done. {}", start); + info!("Cleaning up incomplete snapshot dirs.."); + if let Err(e) = snapshot_utils::remove_incomplete_bank_snapshot_dir( + &config.snapshot_config.bank_snapshots_dir, + ) { + return Err(format!( + "Failed to clean up incomplete snapshot dirs: {e:?}" + )); + } + info!("Cleaning orphaned account snapshot directories.."); - if let Err(e) = clean_orphaned_account_snapshot_dirs( + if let Err(e) = snapshot_utils::clean_orphaned_account_snapshot_dirs( &config.snapshot_config.bank_snapshots_dir, &config.account_snapshot_paths, ) { diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index ed3c7b38f41ab3..a4824c01771ce9 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -74,9 +74,9 @@ use { snapshot_hash::StartingSnapshotHashes, snapshot_minimizer::SnapshotMinimizer, snapshot_utils::{ - self, clean_orphaned_account_snapshot_dirs, create_all_accounts_run_and_snapshot_dirs, - move_and_async_delete_path, ArchiveFormat, SnapshotVersion, - DEFAULT_ARCHIVE_COMPRESSION, SUPPORTED_ARCHIVE_COMPRESSION, + self, create_all_accounts_run_and_snapshot_dirs, move_and_async_delete_path, + ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION, + SUPPORTED_ARCHIVE_COMPRESSION, }, }, solana_sdk::{ @@ -1205,14 +1205,19 @@ fn load_bank_forks( }); measure.stop(); info!("done. {}", measure); - + info!("Cleaning up incomplete snapshot dirs.."); + if let Err(e) = snapshot_utils::remove_incomplete_bank_snapshot_dir(&bank_snapshots_dir) { + eprintln!("Failed to clean up incomplete snapshot dirs: {e:?}"); + exit(1); + } info!( "Cleaning contents of account snapshot paths: {:?}", account_snapshot_paths ); - if let Err(e) = - clean_orphaned_account_snapshot_dirs(&bank_snapshots_dir, &account_snapshot_paths) - { + if let Err(e) = snapshot_utils::clean_orphaned_account_snapshot_dirs( + &bank_snapshots_dir, + &account_snapshot_paths, + ) { eprintln!("Failed to clean orphaned account snapshot dirs. Error: {e:?}"); exit(1); } diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 0850de933d3fdf..a0b96751a9fcd8 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -788,6 +788,65 @@ pub fn get_bank_snapshots(bank_snapshots_dir: impl AsRef) -> Vec) -> Result<()> { + let entries = fs::read_dir(&bank_snapshots_dir).map_err(|err| { + SnapshotError::IoWithSourceAndFile( + err, + "Unable to read bank snapshots directory", + bank_snapshots_dir.as_ref().to_path_buf(), + ) + })?; + + let mut final_result = Ok(()); + + for entry in entries { + let entry = match entry { + Ok(entry) => entry, + Err(_) => continue, + }; + + if !entry.path().is_dir() { + continue; + } + + let slot = match entry.path().file_name().and_then(|file_name| { + file_name + .to_str() + .and_then(|file_name| file_name.parse::().ok()) + }) { + Some(slot) => slot, + None => continue, + }; + + let bank_snapshot_dir = get_bank_snapshots_dir(&bank_snapshots_dir, slot); + let file_complete = bank_snapshot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME); + if file_complete.exists() { + continue; + } + + match fs::remove_dir_all(bank_snapshot_dir) { + Ok(()) => {} + Err(err) => { + let snapshot_error = SnapshotError::IoWithSourceAndFile( + err, + "Unable to remove bank snapshots directory", + bank_snapshots_dir.as_ref().to_path_buf(), + ); + + if final_result.is_ok() { + final_result = Err(snapshot_error); + } + } + } + } + + final_result +} + /// Get the bank snapshots in a directory /// /// This function retains only the bank snapshots of type BankSnapshotType::Pre From 4d814bcedcc1e31599c783f57edb2764af7c6872 Mon Sep 17 00:00:00 2001 From: Xiang Zhu Date: Mon, 10 Apr 2023 22:42:33 -0700 Subject: [PATCH 2/6] Remove incomplete snapshot dir if incomplete --- runtime/src/snapshot_utils.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index a0b96751a9fcd8..d84aa8c0635ef1 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -204,6 +204,7 @@ impl BankSnapshotInfo { // filled. Check the completion to avoid using a highest found slot directory with missing content. let completion_flag_file = bank_snapshot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME); if !fs::metadata(&completion_flag_file)?.is_file() { + fs::remove_dir_all(bank_snapshot_dir)?; return Err(SnapshotNewFromDirError::IncompleteDir(completion_flag_file)); } From 8538fbc910d0fdf5278557e660125937255a85e4 Mon Sep 17 00:00:00 2001 From: Xiang Zhu Date: Mon, 10 Apr 2023 22:42:49 -0700 Subject: [PATCH 3/6] Revert "Add remove_incomplete_bank_snapshot_dir" This reverts commit aaafef74803ca4e2050b8cd83ef352c5a59509b4. --- core/src/validator.rs | 13 ++------ ledger-tool/src/main.rs | 19 +++++------ runtime/src/snapshot_utils.rs | 59 ----------------------------------- 3 files changed, 9 insertions(+), 82 deletions(-) diff --git a/core/src/validator.rs b/core/src/validator.rs index 3f05c54e33a3b7..cd48735b919615 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -90,7 +90,7 @@ use { snapshot_archive_info::SnapshotArchiveInfoGetter, snapshot_config::SnapshotConfig, snapshot_hash::StartingSnapshotHashes, - snapshot_utils::{self, move_and_async_delete_path}, + snapshot_utils::{self, clean_orphaned_account_snapshot_dirs, move_and_async_delete_path}, }, solana_sdk::{ clock::Slot, @@ -553,17 +553,8 @@ impl Validator { start.stop(); info!("done. {}", start); - info!("Cleaning up incomplete snapshot dirs.."); - if let Err(e) = snapshot_utils::remove_incomplete_bank_snapshot_dir( - &config.snapshot_config.bank_snapshots_dir, - ) { - return Err(format!( - "Failed to clean up incomplete snapshot dirs: {e:?}" - )); - } - info!("Cleaning orphaned account snapshot directories.."); - if let Err(e) = snapshot_utils::clean_orphaned_account_snapshot_dirs( + if let Err(e) = clean_orphaned_account_snapshot_dirs( &config.snapshot_config.bank_snapshots_dir, &config.account_snapshot_paths, ) { diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index a4824c01771ce9..ed3c7b38f41ab3 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -74,9 +74,9 @@ use { snapshot_hash::StartingSnapshotHashes, snapshot_minimizer::SnapshotMinimizer, snapshot_utils::{ - self, create_all_accounts_run_and_snapshot_dirs, move_and_async_delete_path, - ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION, - SUPPORTED_ARCHIVE_COMPRESSION, + self, clean_orphaned_account_snapshot_dirs, create_all_accounts_run_and_snapshot_dirs, + move_and_async_delete_path, ArchiveFormat, SnapshotVersion, + DEFAULT_ARCHIVE_COMPRESSION, SUPPORTED_ARCHIVE_COMPRESSION, }, }, solana_sdk::{ @@ -1205,19 +1205,14 @@ fn load_bank_forks( }); measure.stop(); info!("done. {}", measure); - info!("Cleaning up incomplete snapshot dirs.."); - if let Err(e) = snapshot_utils::remove_incomplete_bank_snapshot_dir(&bank_snapshots_dir) { - eprintln!("Failed to clean up incomplete snapshot dirs: {e:?}"); - exit(1); - } + info!( "Cleaning contents of account snapshot paths: {:?}", account_snapshot_paths ); - if let Err(e) = snapshot_utils::clean_orphaned_account_snapshot_dirs( - &bank_snapshots_dir, - &account_snapshot_paths, - ) { + if let Err(e) = + clean_orphaned_account_snapshot_dirs(&bank_snapshots_dir, &account_snapshot_paths) + { eprintln!("Failed to clean orphaned account snapshot dirs. Error: {e:?}"); exit(1); } diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index d84aa8c0635ef1..dac83dd165b04f 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -789,65 +789,6 @@ pub fn get_bank_snapshots(bank_snapshots_dir: impl AsRef) -> Vec) -> Result<()> { - let entries = fs::read_dir(&bank_snapshots_dir).map_err(|err| { - SnapshotError::IoWithSourceAndFile( - err, - "Unable to read bank snapshots directory", - bank_snapshots_dir.as_ref().to_path_buf(), - ) - })?; - - let mut final_result = Ok(()); - - for entry in entries { - let entry = match entry { - Ok(entry) => entry, - Err(_) => continue, - }; - - if !entry.path().is_dir() { - continue; - } - - let slot = match entry.path().file_name().and_then(|file_name| { - file_name - .to_str() - .and_then(|file_name| file_name.parse::().ok()) - }) { - Some(slot) => slot, - None => continue, - }; - - let bank_snapshot_dir = get_bank_snapshots_dir(&bank_snapshots_dir, slot); - let file_complete = bank_snapshot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME); - if file_complete.exists() { - continue; - } - - match fs::remove_dir_all(bank_snapshot_dir) { - Ok(()) => {} - Err(err) => { - let snapshot_error = SnapshotError::IoWithSourceAndFile( - err, - "Unable to remove bank snapshots directory", - bank_snapshots_dir.as_ref().to_path_buf(), - ); - - if final_result.is_ok() { - final_result = Err(snapshot_error); - } - } - } - } - - final_result -} - /// Get the bank snapshots in a directory /// /// This function retains only the bank snapshots of type BankSnapshotType::Pre From 7f5f348d80c20399066a7a05c56ff78a9c795c6f Mon Sep 17 00:00:00 2001 From: Xiang Zhu Date: Tue, 11 Apr 2023 13:10:52 -0700 Subject: [PATCH 4/6] Replace metadata's is_file with simple PathBuf's is_file --- runtime/src/snapshot_utils.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index dac83dd165b04f..527ec9555813a2 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -203,7 +203,7 @@ impl BankSnapshotInfo { // There is a time window from the slot directory being created, and the content being completely // filled. Check the completion to avoid using a highest found slot directory with missing content. let completion_flag_file = bank_snapshot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME); - if !fs::metadata(&completion_flag_file)?.is_file() { + if !completion_flag_file.is_file() { fs::remove_dir_all(bank_snapshot_dir)?; return Err(SnapshotNewFromDirError::IncompleteDir(completion_flag_file)); } From 46f4f2ed280e2e41264b1adafca1b1bd7d4974cd Mon Sep 17 00:00:00 2001 From: Xiang Zhu Date: Tue, 11 Apr 2023 14:38:53 -0700 Subject: [PATCH 5/6] In test verify the deletion of the incomplete snapshot dir --- runtime/src/snapshot_utils.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 527ec9555813a2..3464bb77960f6c 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -5090,8 +5090,13 @@ mod tests { let complete_flag_file = snapshot.snapshot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME); fs::remove_file(complete_flag_file).unwrap(); + // The incomplete snapshot dir should still exist + let snapshot_dir_4 = snapshot.snapshot_dir; + assert!(snapshot_dir_4.exists()); let snapshot = get_highest_bank_snapshot(bank_snapshots_dir).unwrap(); assert_eq!(snapshot.slot, 3); + // The incomplete snapshot dir should have been deleted + assert!(!snapshot_dir_4.exists()); let snapshot_version_file = snapshot.snapshot_dir.join(SNAPSHOT_VERSION_FILENAME); fs::remove_file(snapshot_version_file).unwrap(); From b9637755b4b376ad9aa0a8ffb433f17ee903ee1e Mon Sep 17 00:00:00 2001 From: Xiang Zhu Date: Tue, 11 Apr 2023 14:47:27 -0700 Subject: [PATCH 6/6] Add comments to explain the snapshot dir cleanup --- runtime/src/snapshot_utils.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 3464bb77960f6c..f881d5286e360b 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -204,6 +204,10 @@ impl BankSnapshotInfo { // filled. Check the completion to avoid using a highest found slot directory with missing content. let completion_flag_file = bank_snapshot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME); if !completion_flag_file.is_file() { + // If the directory is incomplete, it should be removed. + // There are also possible hardlink files under /snapshot//, referred by this + // snapshot dir's symlinks. They are cleaned up in clean_orphaned_account_snapshot_dirs() at the + // boot time. fs::remove_dir_all(bank_snapshot_dir)?; return Err(SnapshotNewFromDirError::IncompleteDir(completion_flag_file)); }