From 2ede3e2cd1f4e7e8377c4a446552be80b89ac858 Mon Sep 17 00:00:00 2001 From: Steven Czabaniuk Date: Thu, 28 Sep 2023 13:38:58 +0200 Subject: [PATCH] Remove bzip2, gzip, and none from supported compression types These extra compression types are inferior for us and have caused us extra support burden (especially none). This change does retain the ArchiveFormat::Tar such that we can continue to skip compression for unit tests. However, "tar" has been removed from the possible values array which means passing "--snapshot-archive-format tar" will error out. --- Cargo.lock | 2 - core/src/snapshot_packager_service.rs | 2 +- download-utils/src/lib.rs | 8 +--- programs/sbf/Cargo.lock | 2 - rpc/src/rpc_service.rs | 19 +++------ runtime/Cargo.toml | 2 - runtime/src/bank/serde_snapshot.rs | 2 +- runtime/src/snapshot_bank_utils.rs | 2 +- runtime/src/snapshot_utils.rs | 42 ++------------------ runtime/src/snapshot_utils/archive_format.rs | 41 ++----------------- scripts/run.sh | 1 - 11 files changed, 15 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0de9f872c59c42..182ba452412f8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6812,12 +6812,10 @@ dependencies = [ "bv", "bytemuck", "byteorder", - "bzip2", "crossbeam-channel", "dashmap 4.0.2", "dir-diff", "ed25519-dalek", - "flate2", "fnv", "fs-err", "im", diff --git a/core/src/snapshot_packager_service.rs b/core/src/snapshot_packager_service.rs index 4c90a8bd18d793..4840e118231b1d 100644 --- a/core/src/snapshot_packager_service.rs +++ b/core/src/snapshot_packager_service.rs @@ -264,7 +264,7 @@ mod tests { let bank_snapshot_info = snapshot_utils::get_highest_bank_snapshot(&bank_snapshots_dir).unwrap(); let snapshot_storages = bank.get_snapshot_storages(None); - let archive_format = ArchiveFormat::TarBzip2; + let archive_format = ArchiveFormat::Tar; let full_archive = snapshot_bank_utils::package_and_archive_full_snapshot( &bank, diff --git a/download-utils/src/lib.rs b/download-utils/src/lib.rs index c42166437cf8ee..2082063ff5bc3f 100644 --- a/download-utils/src/lib.rs +++ b/download-utils/src/lib.rs @@ -283,13 +283,7 @@ pub fn download_snapshot_archive( }); fs::create_dir_all(&snapshot_archives_remote_dir).unwrap(); - for archive_format in [ - ArchiveFormat::TarZstd, - ArchiveFormat::TarGzip, - ArchiveFormat::TarBzip2, - ArchiveFormat::TarLz4, - ArchiveFormat::Tar, // `solana-test-validator` creates uncompressed snapshots - ] { + for archive_format in [ArchiveFormat::TarZstd, ArchiveFormat::TarLz4] { let destination_path = match snapshot_kind { SnapshotKind::FullSnapshot => snapshot_utils::build_full_snapshot_archive_path( &snapshot_archives_remote_dir, diff --git a/programs/sbf/Cargo.lock b/programs/sbf/Cargo.lock index b321cf18426da4..84ea4f661e7a7d 100644 --- a/programs/sbf/Cargo.lock +++ b/programs/sbf/Cargo.lock @@ -5509,11 +5509,9 @@ dependencies = [ "bv", "bytemuck", "byteorder 1.4.3", - "bzip2", "crossbeam-channel", "dashmap", "dir-diff", - "flate2", "fnv", "fs-err", "im", diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index 4fdde57c31a9fe..5d6fb2ac07baa3 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -745,7 +745,9 @@ mod tests { assert!(!rrm.is_file_get_path("//genesis.tar.bz2")); assert!(!rrm.is_file_get_path("/../genesis.tar.bz2")); - assert!(!rrm.is_file_get_path("/snapshot.tar.bz2")); // This is a redirect + // These two are redirects + assert!(!rrm.is_file_get_path("/snapshot.tar.bz2")); + assert!(!rrm.is_file_get_path("/incremental-snapshot.tar.bz2")); assert!(!rrm.is_file_get_path( "/snapshot-100-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.bz2" @@ -754,26 +756,15 @@ mod tests { "/incremental-snapshot-100-200-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.bz2" )); - assert!(rrm_with_snapshot_config.is_file_get_path( - "/snapshot-100-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.bz2" - )); assert!(rrm_with_snapshot_config.is_file_get_path( "/snapshot-100-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.zst" )); - assert!(rrm_with_snapshot_config - .is_file_get_path("/snapshot-100-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.gz")); assert!(rrm_with_snapshot_config .is_file_get_path("/snapshot-100-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar")); - assert!(rrm_with_snapshot_config.is_file_get_path( - "/incremental-snapshot-100-200-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.bz2" - )); assert!(rrm_with_snapshot_config.is_file_get_path( "/incremental-snapshot-100-200-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.zst" )); - assert!(rrm_with_snapshot_config.is_file_get_path( - "/incremental-snapshot-100-200-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.gz" - )); assert!(rrm_with_snapshot_config.is_file_get_path( "/incremental-snapshot-100-200-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar" )); @@ -782,10 +773,10 @@ mod tests { "/snapshot-notaslotnumber-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.bz2" )); assert!(!rrm_with_snapshot_config.is_file_get_path( - "/incremental-snapshot-notaslotnumber-200-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.bz2" + "/incremental-snapshot-notaslotnumber-200-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.zstd" )); assert!(!rrm_with_snapshot_config.is_file_get_path( - "/incremental-snapshot-100-notaslotnumber-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.bz2" + "/incremental-snapshot-100-notaslotnumber-AvFf9oS8A8U78HdjT9YG2sTTThLHJZmhaMn2g8vkWYnr.tar.zstd" )); assert!(!rrm_with_snapshot_config.is_file_get_path("../../../test/snapshot-123-xxx.tar")); diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 2d15c7acbace71..6d1091ff8b15c1 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -17,11 +17,9 @@ blake3 = { workspace = true } bv = { workspace = true, features = ["serde"] } bytemuck = { workspace = true } byteorder = { workspace = true } -bzip2 = { workspace = true } crossbeam-channel = { workspace = true } dashmap = { workspace = true, features = ["rayon", "raw-api"] } dir-diff = { workspace = true } -flate2 = { workspace = true } fnv = { workspace = true } fs-err = { workspace = true } im = { workspace = true, features = ["rayon", "serde"] } diff --git a/runtime/src/bank/serde_snapshot.rs b/runtime/src/bank/serde_snapshot.rs index 2e0bdd3ecc7ab6..671a6dc6d738e5 100644 --- a/runtime/src/bank/serde_snapshot.rs +++ b/runtime/src/bank/serde_snapshot.rs @@ -474,7 +474,7 @@ mod tests { None, full_snapshot_archives_dir.path(), incremental_snapshot_archives_dir.path(), - ArchiveFormat::TarBzip2, + ArchiveFormat::Tar, NonZeroUsize::new(1).unwrap(), NonZeroUsize::new(1).unwrap(), ) diff --git a/runtime/src/snapshot_bank_utils.rs b/runtime/src/snapshot_bank_utils.rs index 82d6f354a4e998..43b0ef5a364563 100644 --- a/runtime/src/snapshot_bank_utils.rs +++ b/runtime/src/snapshot_bank_utils.rs @@ -1408,7 +1408,7 @@ mod tests { let bank_snapshots_dir = tempfile::TempDir::new().unwrap(); let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap(); let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap(); - let snapshot_archive_format = ArchiveFormat::TarGzip; + let snapshot_archive_format = ArchiveFormat::Tar; let full_snapshot_archive_info = bank_to_full_snapshot_archive( bank_snapshots_dir.path(), diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 74c9b2421f4c99..9a115531187100 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -10,9 +10,7 @@ use { RebuiltSnapshotStorage, SnapshotStorageRebuilder, }, }, - bzip2::bufread::BzDecoder, crossbeam_channel::Sender, - flate2::read::GzDecoder, fs_err, lazy_static::lazy_static, log::*, @@ -70,8 +68,9 @@ pub const DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(2) }; pub const DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN: NonZeroUsize = unsafe { NonZeroUsize::new_unchecked(4) }; -pub const FULL_SNAPSHOT_ARCHIVE_FILENAME_REGEX: &str = r"^snapshot-(?P[[:digit:]]+)-(?P[[:alnum:]]+)\.(?Ptar|tar\.bz2|tar\.zst|tar\.gz|tar\.lz4)$"; -pub const INCREMENTAL_SNAPSHOT_ARCHIVE_FILENAME_REGEX: &str = r"^incremental-snapshot-(?P[[:digit:]]+)-(?P[[:digit:]]+)-(?P[[:alnum:]]+)\.(?Ptar|tar\.bz2|tar\.zst|tar\.gz|tar\.lz4)$"; +pub const FULL_SNAPSHOT_ARCHIVE_FILENAME_REGEX: &str = + r"^snapshot-(?P[[:digit:]]+)-(?P[[:alnum:]]+)\.(?Ptar|tar\.zst|tar\.lz4)$"; +pub const INCREMENTAL_SNAPSHOT_ARCHIVE_FILENAME_REGEX: &str = r"^incremental-snapshot-(?P[[:digit:]]+)-(?P[[:digit:]]+)-(?P[[:alnum:]]+)\.(?Ptar|tar\.zst|tar\.lz4)$"; #[derive(Copy, Clone, Default, Eq, PartialEq, Debug)] pub enum SnapshotVersion { @@ -783,18 +782,6 @@ pub fn archive_snapshot_package( }; match snapshot_package.archive_format() { - ArchiveFormat::TarBzip2 => { - let mut encoder = - bzip2::write::BzEncoder::new(archive_file, bzip2::Compression::best()); - do_archive_files(&mut encoder)?; - encoder.finish()?; - } - ArchiveFormat::TarGzip => { - let mut encoder = - flate2::write::GzEncoder::new(archive_file, flate2::Compression::default()); - do_archive_files(&mut encoder)?; - encoder.finish()?; - } ArchiveFormat::TarZstd => { let mut encoder = zstd::stream::Encoder::new(archive_file, 0)?; do_archive_files(&mut encoder)?; @@ -1917,8 +1904,6 @@ fn untar_snapshot_create_shared_buffer( ) -> SharedBuffer { let open_file = || fs_err::File::open(snapshot_tar).unwrap(); match archive_format { - ArchiveFormat::TarBzip2 => SharedBuffer::new(BzDecoder::new(BufReader::new(open_file()))), - ArchiveFormat::TarGzip => SharedBuffer::new(GzDecoder::new(BufReader::new(open_file()))), ArchiveFormat::TarZstd => SharedBuffer::new( zstd::stream::read::Decoder::new(BufReader::new(open_file())).unwrap(), ), @@ -2339,14 +2324,6 @@ mod tests { #[test] fn test_parse_full_snapshot_archive_filename() { - assert_eq!( - parse_full_snapshot_archive_filename(&format!( - "snapshot-42-{}.tar.bz2", - Hash::default() - )) - .unwrap(), - (42, SnapshotHash(Hash::default()), ArchiveFormat::TarBzip2) - ); assert_eq!( parse_full_snapshot_archive_filename(&format!( "snapshot-43-{}.tar.zst", @@ -2411,19 +2388,6 @@ mod tests { #[test] fn test_parse_incremental_snapshot_archive_filename() { - assert_eq!( - parse_incremental_snapshot_archive_filename(&format!( - "incremental-snapshot-42-123-{}.tar.bz2", - Hash::default() - )) - .unwrap(), - ( - 42, - 123, - SnapshotHash(Hash::default()), - ArchiveFormat::TarBzip2 - ) - ); assert_eq!( parse_incremental_snapshot_archive_filename(&format!( "incremental-snapshot-43-234-{}.tar.zst", diff --git a/runtime/src/snapshot_utils/archive_format.rs b/runtime/src/snapshot_utils/archive_format.rs index 4ef271cbb5bf3a..a3d1b609a4a4a0 100644 --- a/runtime/src/snapshot_utils/archive_format.rs +++ b/runtime/src/snapshot_utils/archive_format.rs @@ -3,11 +3,10 @@ use { strum::Display, }; -pub const SUPPORTED_ARCHIVE_COMPRESSION: &[&str] = &["bz2", "gzip", "zstd", "lz4", "tar", "none"]; +// Publicly support "zstd" and "lz4", but retain support for "tar" for unit tests +pub const SUPPORTED_ARCHIVE_COMPRESSION: &[&str] = &["zstd", "lz4"]; pub const DEFAULT_ARCHIVE_COMPRESSION: &str = "zstd"; -pub const TAR_BZIP2_EXTENSION: &str = "tar.bz2"; -pub const TAR_GZIP_EXTENSION: &str = "tar.gz"; pub const TAR_ZSTD_EXTENSION: &str = "tar.zst"; pub const TAR_LZ4_EXTENSION: &str = "tar.lz4"; pub const TAR_EXTENSION: &str = "tar"; @@ -15,8 +14,6 @@ pub const TAR_EXTENSION: &str = "tar"; /// The different archive formats used for snapshots #[derive(Copy, Clone, Debug, Eq, PartialEq, Display)] pub enum ArchiveFormat { - TarBzip2, - TarGzip, TarZstd, TarLz4, Tar, @@ -26,8 +23,6 @@ impl ArchiveFormat { /// Get the file extension for the ArchiveFormat pub fn extension(&self) -> &str { match self { - ArchiveFormat::TarBzip2 => TAR_BZIP2_EXTENSION, - ArchiveFormat::TarGzip => TAR_GZIP_EXTENSION, ArchiveFormat::TarZstd => TAR_ZSTD_EXTENSION, ArchiveFormat::TarLz4 => TAR_LZ4_EXTENSION, ArchiveFormat::Tar => TAR_EXTENSION, @@ -36,11 +31,8 @@ impl ArchiveFormat { pub fn from_cli_arg(archive_format_str: &str) -> Option { match archive_format_str { - "bz2" => Some(ArchiveFormat::TarBzip2), - "gzip" => Some(ArchiveFormat::TarGzip), "zstd" => Some(ArchiveFormat::TarZstd), "lz4" => Some(ArchiveFormat::TarLz4), - "tar" | "none" => Some(ArchiveFormat::Tar), _ => None, } } @@ -53,8 +45,6 @@ impl TryFrom<&str> for ArchiveFormat { fn try_from(extension: &str) -> Result { match extension { - TAR_BZIP2_EXTENSION => Ok(ArchiveFormat::TarBzip2), - TAR_GZIP_EXTENSION => Ok(ArchiveFormat::TarGzip), TAR_ZSTD_EXTENSION => Ok(ArchiveFormat::TarZstd), TAR_LZ4_EXTENSION => Ok(ArchiveFormat::TarLz4), TAR_EXTENSION => Ok(ArchiveFormat::Tar), @@ -93,8 +83,6 @@ mod tests { #[test] fn test_extension() { - assert_eq!(ArchiveFormat::TarBzip2.extension(), TAR_BZIP2_EXTENSION); - assert_eq!(ArchiveFormat::TarGzip.extension(), TAR_GZIP_EXTENSION); assert_eq!(ArchiveFormat::TarZstd.extension(), TAR_ZSTD_EXTENSION); assert_eq!(ArchiveFormat::TarLz4.extension(), TAR_LZ4_EXTENSION); assert_eq!(ArchiveFormat::Tar.extension(), TAR_EXTENSION); @@ -102,14 +90,6 @@ mod tests { #[test] fn test_try_from() { - assert_eq!( - ArchiveFormat::try_from(TAR_BZIP2_EXTENSION), - Ok(ArchiveFormat::TarBzip2) - ); - assert_eq!( - ArchiveFormat::try_from(TAR_GZIP_EXTENSION), - Ok(ArchiveFormat::TarGzip) - ); assert_eq!( ArchiveFormat::try_from(TAR_ZSTD_EXTENSION), Ok(ArchiveFormat::TarZstd) @@ -130,14 +110,6 @@ mod tests { #[test] fn test_from_str() { - assert_eq!( - ArchiveFormat::from_str(TAR_BZIP2_EXTENSION), - Ok(ArchiveFormat::TarBzip2) - ); - assert_eq!( - ArchiveFormat::from_str(TAR_GZIP_EXTENSION), - Ok(ArchiveFormat::TarGzip) - ); assert_eq!( ArchiveFormat::from_str(TAR_ZSTD_EXTENSION), Ok(ArchiveFormat::TarZstd) @@ -158,14 +130,7 @@ mod tests { #[test] fn test_from_cli_arg() { - let golden = [ - Some(ArchiveFormat::TarBzip2), - Some(ArchiveFormat::TarGzip), - Some(ArchiveFormat::TarZstd), - Some(ArchiveFormat::TarLz4), - Some(ArchiveFormat::Tar), - Some(ArchiveFormat::Tar), - ]; + let golden = [Some(ArchiveFormat::TarZstd), Some(ArchiveFormat::TarLz4)]; for (arg, expected) in zip(SUPPORTED_ARCHIVE_COMPRESSION.iter(), golden.into_iter()) { assert_eq!(ArchiveFormat::from_cli_arg(arg), expected); diff --git a/scripts/run.sh b/scripts/run.sh index a890aa10c17474..699bfce3e253e3 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -110,7 +110,6 @@ args=( --enable-rpc-transaction-history --enable-extended-tx-metadata-storage --init-complete-file "$dataDir"/init-completed - --snapshot-compression none --require-tower --no-wait-for-vote-to-start-leader --no-os-network-limits-test