From 8d08780af8d1731a6ee0f22e779bd3dd53eed5e6 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 20 Jun 2023 17:03:56 +1000 Subject: [PATCH 01/43] Move format upgrades to their own module and enum --- .../src/service/finalized_state/disk_db.rs | 65 +-------- .../service/finalized_state/disk_format.rs | 1 + .../finalized_state/disk_format/upgrade.rs | 138 ++++++++++++++++++ 3 files changed, 147 insertions(+), 57 deletions(-) create mode 100644 zebra-state/src/service/finalized_state/disk_format/upgrade.rs diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index 7e8ebe44662..79a0ff56fb4 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -10,14 +10,7 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! be incremented each time the database format (column, serialization, etc) changes. -use std::{ - cmp::Ordering, - collections::{BTreeMap, HashMap}, - fmt::Debug, - ops::RangeBounds, - path::Path, - sync::Arc, -}; +use std::{fmt::Debug, path::Path, sync::Arc}; use itertools::Itertools; use rlimit::increase_nofile_limit; @@ -25,11 +18,8 @@ use rlimit::increase_nofile_limit; use zebra_chain::parameters::Network; use crate::{ - config::{ - database_format_version_in_code, database_format_version_on_disk, - write_database_format_version_to_disk, - }, - service::finalized_state::disk_format::{FromDisk, IntoDisk}, + config::{database_format_version_in_code, database_format_version_on_disk}, + service::finalized_state::disk_format::{upgrade::DbFormatChange, FromDisk, IntoDisk}, Config, }; @@ -532,30 +522,8 @@ impl DiskDb { let disk_version = database_format_version_on_disk(config, network) .expect("unable to read database format version file"); - match disk_version.as_ref().map(|disk| disk.cmp(&running_version)) { - // TODO: if the on-disk format is older, actually run the upgrade task after the - // database has been opened (#6642) - Some(Ordering::Less) => info!( - ?running_version, - ?disk_version, - "trying to open older database format: launching upgrade task" - ), - // TODO: if the on-disk format is newer, downgrade the version after the - // database has been opened (#6642) - Some(Ordering::Greater) => info!( - ?running_version, - ?disk_version, - "trying to open newer database format: data should be compatible" - ), - Some(Ordering::Equal) => info!( - ?running_version, - "trying to open compatible database format" - ), - None => info!( - ?running_version, - "creating new database with the current format" - ), - } + // Log any format changes before opening, in case opening fails. + let format_change = DbFormatChange::new(running_version, disk_version); let db_options = DiskDb::options(); @@ -590,26 +558,9 @@ impl DiskDb { db.assert_default_cf_is_empty(); - // Now we've checked that the database format is up-to-date, - // mark it as updated on disk. - // - // # Concurrency - // - // The version must only be updated while RocksDB is holding the database - // directory lock. This prevents multiple Zebra instances corrupting the version - // file. - // - // # TODO - // - // - only update the version at the end of the format upgrade task (#6642) - // - add a note to the format upgrade task code to update the version constants - // whenever the format changes - // - add a test that the format upgrade runs exactly once when: - // 1. if an older cached state format is opened, the format is upgraded, - // then if Zebra is launched again the format is not upgraded - // 2. if the current cached state format is opened, the format is not upgraded - write_database_format_version_to_disk(config, network) - .expect("unable to write database format version file to disk"); + if let Some(format_change) = format_change { + format_change.apply_format_change(config, network); + } db } diff --git a/zebra-state/src/service/finalized_state/disk_format.rs b/zebra-state/src/service/finalized_state/disk_format.rs index e731ff20d8d..716792f1cb1 100644 --- a/zebra-state/src/service/finalized_state/disk_format.rs +++ b/zebra-state/src/service/finalized_state/disk_format.rs @@ -11,6 +11,7 @@ pub mod block; pub mod chain; pub mod shielded; pub mod transparent; +pub mod upgrade; #[cfg(test)] mod tests; diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs new file mode 100644 index 00000000000..a0d32b6e03f --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -0,0 +1,138 @@ +//! In-place format upgrades for the Zebra state database. + +use std::cmp::Ordering; + +use semver::Version; + +use zebra_chain::parameters::Network; +use DbFormatChange::*; + +use crate::{config::write_database_format_version_to_disk, Config}; + +/// The kind of database format change we're performing. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum DbFormatChange { + /// Upgrading the format from `disk_version` to `running_version`. + /// + /// Until this upgrade is complete, the format is a mixture of both versions. + Upgrade { + disk_version: Version, + running_version: Version, + }, + + /// Marking the format as downgraded from `disk_version` to `running_version`. + /// + /// Until the state is upgraded to `disk_version` by a later Zebra version, + /// the format will be a mixture of both versions. + Downgrade { + disk_version: Version, + running_version: Version, + }, +} + +impl DbFormatChange { + /// Check if loading `disk_version` into `running_version` needs a format change, + /// and if it does, return the required format change. + /// + /// Also logs the kind of change at info level. + /// + /// If `disk_version` is `None`, Zebra is creating a new database. + pub fn new(running_version: Version, disk_version: Option) -> Option { + let Some(disk_version) = disk_version else { + info!( + ?running_version, + "creating new database with the current format" + ); + + return None; + }; + + match disk_version.cmp(&running_version) { + Ordering::Less => { + info!( + ?running_version, + ?disk_version, + "trying to open older database format: launching upgrade task" + ); + + Some(Upgrade { + disk_version, + running_version, + }) + } + Ordering::Greater => { + info!( + ?running_version, + ?disk_version, + "trying to open newer database format: data should be compatible" + ); + + Some(Downgrade { + disk_version, + running_version, + }) + } + Ordering::Equal => { + info!( + ?running_version, + "trying to open compatible database format" + ); + + None + } + } + } + + /// Returns true if this change is an upgrade. + pub fn is_upgrade(&self) -> bool { + matches!(self, Upgrade { .. }) + } + + /// Apply this format change to the database. + /// Format changes should be launched in an independent `std::thread`. + // + // New format changes must be added to the *end* of this method. + pub fn apply_format_change(&self, config: &Config, network: Network) { + if !self.is_upgrade() { + // # Correctness + // + // At the start of a format downgrade, the database must be marked as partially or + // fully downgraded. + Self::mark_as_changed(config, network); + + // Older supported versions just assume they can read newer formats, + // because they can't predict all changes a newer Zebra version could make. + // + // The resposibility of staying backwards-compatible is on the newer version. + // We do this on a best-effort basis for versions that are still supported. + return; + } + + // # TODO: link to format upgrade instructions doc here + + // # Correctness + // + // New code goes above this comment! + // + // Run the latest format upgrade code after the other upgrades are complete, + // but before marking the format as upgraded. + + // At the end of a format upgrade, the database is marked as fully upgraded. + // Upgrades can be run more than once if Zebra is restarted, so this is just a performance + // optimisation. + Self::mark_as_changed(config, network); + } + + /// Mark the database as fully upgraded. + /// This should be called after database format is up-to-date. + /// + /// # Concurrency + /// + /// The version must only be updated while RocksDB is holding the database + /// directory lock. This prevents multiple Zebra instances corrupting the version + /// file. + fn mark_as_changed(config: &Config, network: Network) { + write_database_format_version_to_disk(config, network) + .expect("unable to write database format version file to disk"); + } +} From 3d8cda00e01b3df1534d8cae0119611ef68d881b Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 20 Jun 2023 18:49:14 +1000 Subject: [PATCH 02/43] Launch a format change thread if needed, and shut it down during shutdown --- .../src/service/finalized_state/disk_db.rs | 32 ++-- .../finalized_state/disk_format/upgrade.rs | 142 +++++++++++++++++- .../src/service/finalized_state/zebra_db.rs | 60 +++++++- 3 files changed, 207 insertions(+), 27 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index 79a0ff56fb4..28f7cb6416b 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -7,7 +7,7 @@ //! //! # Correctness //! -//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must +//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constants must //! be incremented each time the database format (column, serialization, etc) changes. use std::{fmt::Debug, path::Path, sync::Arc}; @@ -18,8 +18,7 @@ use rlimit::increase_nofile_limit; use zebra_chain::parameters::Network; use crate::{ - config::{database_format_version_in_code, database_format_version_on_disk}, - service::finalized_state::disk_format::{upgrade::DbFormatChange, FromDisk, IntoDisk}, + service::finalized_state::disk_format::{FromDisk, IntoDisk}, Config, }; @@ -518,13 +517,6 @@ impl DiskDb { pub fn new(config: &Config, network: Network) -> DiskDb { let path = config.db_path(network); - let running_version = database_format_version_in_code(); - let disk_version = database_format_version_on_disk(config, network) - .expect("unable to read database format version file"); - - // Log any format changes before opening, in case opening fails. - let format_change = DbFormatChange::new(running_version, disk_version); - let db_options = DiskDb::options(); // When opening the database in read/write mode, all column families must be opened. @@ -558,10 +550,6 @@ impl DiskDb { db.assert_default_cf_is_empty(); - if let Some(format_change) = format_change { - format_change.apply_format_change(config, network); - } - db } @@ -760,6 +748,19 @@ impl DiskDb { // Cleanup methods + /// Returns the number of shared instances of this database. + /// + /// # Concurrency + /// + /// The actual number of owners can be higher or lower than the returned value, + /// because databases can simultaneously be cloned or dropped in other threads. + /// + /// However, if the number of owners is 1, and the caller has exclusive access, + /// the count can't increase unless that caller clones the database. + pub(crate) fn shared_database_owners(&self) -> usize { + Arc::strong_count(&self.db) + Arc::weak_count(&self.db) + } + /// Shut down the database, cleaning up background tasks and ephemeral data. /// /// If `force` is true, clean up regardless of any shared references. @@ -780,9 +781,8 @@ impl DiskDb { // instance. If they do, they must drop it before: // - shutting down database threads, or // - deleting database files. - let shared_database_owners = Arc::strong_count(&self.db) + Arc::weak_count(&self.db); - if shared_database_owners > 1 { + if self.shared_database_owners() > 1 { let path = self.path(); let mut ephemeral_note = ""; diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index a0d32b6e03f..42fd80b98f7 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -1,10 +1,16 @@ //! In-place format upgrades for the Zebra state database. -use std::cmp::Ordering; +use std::{ + cmp::Ordering, + sync::{mpsc, Arc}, + thread::{self, JoinHandle}, +}; use semver::Version; +use tracing::Span; + +use zebra_chain::{block::Height, parameters::Network}; -use zebra_chain::parameters::Network; use DbFormatChange::*; use crate::{config::write_database_format_version_to_disk, Config}; @@ -30,6 +36,31 @@ pub enum DbFormatChange { }, } +/// A handle to a spawned format change thread. +/// +/// Cloning this struct creates an additional handle to the same thread. +/// +/// # Concurrency +/// +/// Cancelling the thread on drop has a race condition, because two handles can be dropped at +/// the same time. +/// +/// If cancelling the thread is important, the owner of the handle must call force_cancel(). +#[derive(Clone, Debug)] +pub struct DbFormatChangeThreadHandle { + /// A handle that can wait for the running format change thread to finish. + /// + /// Panics from this thread are propagated into Zebra's state service. + join_handle: Arc>, + + /// A channel that tells the running format thread to finish early. + cancel_handle: mpsc::SyncSender, +} + +/// Marker for cancelling a format upgrade. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct CancelFormatChange; + impl DbFormatChange { /// Check if loading `disk_version` into `running_version` needs a format change, /// and if it does, return the required format change. @@ -88,17 +119,53 @@ impl DbFormatChange { matches!(self, Upgrade { .. }) } + /// Launch a `std::thread` that applies this format change to the database. + pub fn spawn_format_change( + self, + config: Config, + network: Network, + initial_tip_height: Option, + ) -> DbFormatChangeThreadHandle { + // # Correctness + // + // Cancel handles must use try_send() to avoid blocking waiting for the format change + // thread to shut down. + let (cancel_handle, cancel_receiver) = mpsc::sync_channel(1); + + let span = Span::current(); + let join_handle = thread::spawn(move || { + span.in_scope(move || { + self.apply_format_change(config, network, initial_tip_height, cancel_receiver); + }) + }); + + DbFormatChangeThreadHandle { + join_handle: Arc::new(join_handle), + cancel_handle, + } + } + /// Apply this format change to the database. /// Format changes should be launched in an independent `std::thread`. + /// + /// If `cancel_receiver` gets a message, or its sender is dropped, + /// the format change stops running early. // // New format changes must be added to the *end* of this method. - pub fn apply_format_change(&self, config: &Config, network: Network) { + fn apply_format_change( + self, + config: Config, + network: Network, + initial_tip_height: Option, + cancel_receiver: mpsc::Receiver, + ) { if !self.is_upgrade() { // # Correctness // // At the start of a format downgrade, the database must be marked as partially or - // fully downgraded. - Self::mark_as_changed(config, network); + // fully downgraded. This lets newer Zebra versions know that some blocks with older + // formats have been added to the database. + Self::mark_as_changed(&config, network); // Older supported versions just assume they can read newer formats, // because they can't predict all changes a newer Zebra version could make. @@ -108,19 +175,42 @@ impl DbFormatChange { return; } - // # TODO: link to format upgrade instructions doc here + // # Correctness + // + // If the format change is outside RocksDb, put new code above this comment! + let Some(initial_tip_height) = initial_tip_height else { + // If the database is empty, then the RocksDb format doesn't need any changes. + Self::mark_as_changed(&config, network); + return; + }; + + // Example format change. + // + // TODO: link to format upgrade instructions doc here + let mut upgrade_height = Height(0); + + // Keep upgrading until the initial database has been upgraded, + // or this task is cancelled by a shutdown. + while upgrade_height <= initial_tip_height + && matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) + { + // TODO: Do one format upgrade step here + + upgrade_height = (upgrade_height + 1).expect("task exits before maximum height"); + } // # Correctness // // New code goes above this comment! // // Run the latest format upgrade code after the other upgrades are complete, - // but before marking the format as upgraded. + // but before marking the format as upgraded. The code should check `cancel_receiver` + // every time it runs its inner update loop. // At the end of a format upgrade, the database is marked as fully upgraded. // Upgrades can be run more than once if Zebra is restarted, so this is just a performance // optimisation. - Self::mark_as_changed(config, network); + Self::mark_as_changed(&config, network); } /// Mark the database as fully upgraded. @@ -136,3 +226,39 @@ impl DbFormatChange { .expect("unable to write database format version file to disk"); } } + +impl DbFormatChangeThreadHandle { + /// Cancel the running format change thread, if this is the last handle. + /// Returns true if it was actually cancelled. + pub fn cancel_if_needed(&self) -> bool { + // # Correctness + // + // Checking the strong count has a race condition, because two handles can be dropped at + // the same time. + // + // If cancelling the thread is important, the owner of the handle must call force_cancel(). + if Arc::strong_count(&self.join_handle) <= 1 { + self.force_cancel(); + return true; + } + + false + } + + /// Force the running format change thread to cancel, even if there are other handles. + pub fn force_cancel(&self) { + // There's nothing we can do about errors here. + // If the channel is disconnected, the task has exited. + // If it's full, it's already been cancelled. + let _ = self.cancel_handle.try_send(CancelFormatChange); + } +} + +impl Drop for DbFormatChangeThreadHandle { + fn drop(&mut self) { + // Only cancel the format change if the state service is shutting down. + self.cancel_if_needed(); + + // There's no point waiting for the task to finish here. + } +} diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index 8b6e261050a..537126c0e9b 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -14,7 +14,14 @@ use std::path::Path; use zebra_chain::parameters::Network; use crate::{ - service::finalized_state::{disk_db::DiskDb, disk_format::block::MAX_ON_DISK_HEIGHT}, + config::{database_format_version_in_code, database_format_version_on_disk}, + service::finalized_state::{ + disk_db::DiskDb, + disk_format::{ + block::MAX_ON_DISK_HEIGHT, + upgrade::{DbFormatChange, DbFormatChangeThreadHandle}, + }, + }, Config, }; @@ -32,12 +39,20 @@ pub mod arbitrary; /// `rocksdb` allows concurrent writes through a shared reference, /// so database instances are cloneable. When the final clone is dropped, /// the database is closed. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug)] pub struct ZebraDb { // Owned State // // Everything contained in this state must be shared by all clones, or read-only. // + /// A handle to a running format change task, which cancels the task when dropped. + /// + /// # Concurrency + /// + /// This field should be dropped before the database field, so the format upgrade task is + /// cancelled before the database is dropped. This helps avoid some kinds of deadlocks. + format_change_handle: Option, + /// The inner low-level database wrapper for the RocksDB database. db: DiskDb, } @@ -46,12 +61,33 @@ impl ZebraDb { /// Opens or creates the database at `config.path` for `network`, /// and returns a shared high-level typed database wrapper. pub fn new(config: &Config, network: Network) -> ZebraDb { - let db = ZebraDb { + let running_version = database_format_version_in_code(); + let disk_version = database_format_version_on_disk(config, network) + .expect("unable to read database format version file"); + + // Log any format changes before opening the database, in case opening fails. + let format_change = DbFormatChange::new(running_version, disk_version); + + // Open the database and do initial checks. + let mut db = ZebraDb { + format_change_handle: None, db: DiskDb::new(config, network), }; db.check_max_on_disk_tip_height(); + let initial_tip_height = db.finalized_tip_height(); + + // Start any required format changes. + if let Some(format_change) = format_change { + // Launch the format change and install its handle in the database. + db.format_change_handle = Some(format_change.spawn_format_change( + config.clone(), + network, + initial_tip_height, + )); + } + db } @@ -68,6 +104,18 @@ impl ZebraDb { /// /// See [`DiskDb::shutdown`] for details. pub fn shutdown(&mut self, force: bool) { + // # Concurrency + // + // The format upgrade task should be cancelled before the database is flushed or shut down. + // This helps avoid some kinds of deadlocks. + // + // See also the correctness note in `DiskDb::shutdown()`. + if force || self.db.shared_database_owners() <= 1 { + if let Some(format_change_handle) = self.format_change_handle.as_ref() { + format_change_handle.force_cancel(); + } + } + self.check_max_on_disk_tip_height(); self.db.shutdown(force); @@ -93,3 +141,9 @@ impl ZebraDb { } } } + +impl Drop for ZebraDb { + fn drop(&mut self) { + self.shutdown(false); + } +} From 33b4b61a4557af1c00c316677f89828de83dfc61 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 20 Jun 2023 18:53:20 +1000 Subject: [PATCH 03/43] Add some TODOs and remove a redundant timer --- zebra-state/src/config.rs | 3 +++ zebra-state/src/service.rs | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 3d09aecdc8c..bf90e052cf2 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -175,6 +175,7 @@ impl Default for Config { } // Cleaning up old database versions +// TODO: put this in a different module? /// Spawns a task that checks if there are old database folders, /// and deletes them from the filesystem. @@ -292,6 +293,8 @@ fn parse_version_number(dir_name: &str) -> Option { .and_then(|version| version.parse().ok()) } +// TODO: move these to the format upgrade module + /// Returns the full semantic version of the currently running database format code. /// /// This is the version implemented by the Zebra code that's currently running, diff --git a/zebra-state/src/service.rs b/zebra-state/src/service.rs index 897903ed4ab..73802f6b590 100644 --- a/zebra-state/src/service.rs +++ b/zebra-state/src/service.rs @@ -263,6 +263,7 @@ impl Drop for ReadStateService { // The read state service shares the state, // so dropping it should check if we can shut down. + // TODO: move this into a try_shutdown() method if let Some(block_write_task) = self.block_write_task.take() { if let Some(block_write_task_handle) = Arc::into_inner(block_write_task) { // We're the last database user, so we can tell it to shut down (blocking): @@ -280,6 +281,7 @@ impl Drop for ReadStateService { #[cfg(test)] debug!("waiting for the block write task to finish"); + // TODO: move this into a check_for_panics() method if let Err(thread_panic) = block_write_task_handle.join() { std::panic::resume_unwind(thread_panic); } else { @@ -343,9 +345,7 @@ impl StateService { .tip_block() .map(CheckpointVerifiedBlock::from) .map(ChainTipBlock::from); - timer.finish(module_path!(), line!(), "fetching database tip"); - let timer = CodeTimer::start(); let (chain_tip_sender, latest_chain_tip, chain_tip_change) = ChainTipSender::new(initial_tip, network); @@ -1161,6 +1161,8 @@ impl Service for ReadStateService { fn poll_ready(&mut self, _: &mut Context<'_>) -> Poll> { // Check for panics in the block write task + // + // TODO: move into a check_for_panics() method let block_write_task = self.block_write_task.take(); if let Some(block_write_task) = block_write_task { From 8339ddb8efb226ac153a62f62c860642834adbfc Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 21 Jun 2023 12:24:24 +1000 Subject: [PATCH 04/43] Regularly check for panics in the state upgrade task --- zebra-state/src/service.rs | 2 + .../finalized_state/disk_format/upgrade.rs | 74 ++++++++++++++++--- .../src/service/finalized_state/zebra_db.rs | 17 ++++- 3 files changed, 80 insertions(+), 13 deletions(-) diff --git a/zebra-state/src/service.rs b/zebra-state/src/service.rs index 73802f6b590..e3955b63e86 100644 --- a/zebra-state/src/service.rs +++ b/zebra-state/src/service.rs @@ -1179,6 +1179,8 @@ impl Service for ReadStateService { } } + self.db.check_for_panics(); + Poll::Ready(Ok(())) } diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 42fd80b98f7..f5f3dc6c38e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -2,6 +2,7 @@ use std::{ cmp::Ordering, + panic, sync::{mpsc, Arc}, thread::{self, JoinHandle}, }; @@ -51,7 +52,7 @@ pub struct DbFormatChangeThreadHandle { /// A handle that can wait for the running format change thread to finish. /// /// Panics from this thread are propagated into Zebra's state service. - join_handle: Arc>, + update_task: Option>>, /// A channel that tells the running format thread to finish early. cancel_handle: mpsc::SyncSender, @@ -133,16 +134,20 @@ impl DbFormatChange { let (cancel_handle, cancel_receiver) = mpsc::sync_channel(1); let span = Span::current(); - let join_handle = thread::spawn(move || { + let update_task = thread::spawn(move || { span.in_scope(move || { self.apply_format_change(config, network, initial_tip_height, cancel_receiver); }) }); - DbFormatChangeThreadHandle { - join_handle: Arc::new(join_handle), + let mut handle = DbFormatChangeThreadHandle { + update_task: Some(Arc::new(update_task)), cancel_handle, - } + }; + + handle.check_for_panics(); + + handle } /// Apply this format change to the database. @@ -237,9 +242,11 @@ impl DbFormatChangeThreadHandle { // the same time. // // If cancelling the thread is important, the owner of the handle must call force_cancel(). - if Arc::strong_count(&self.join_handle) <= 1 { - self.force_cancel(); - return true; + if let Some(update_task) = self.update_task.as_ref() { + if Arc::strong_count(update_task) <= 1 { + self.force_cancel(); + return true; + } } false @@ -252,13 +259,58 @@ impl DbFormatChangeThreadHandle { // If it's full, it's already been cancelled. let _ = self.cancel_handle.try_send(CancelFormatChange); } + + /// Check for panics in the code running in the spawned thread. + /// If the thread exited with a panic, resume that panic. + /// + /// This method should be called regularly, so that panics are detected as soon as possible. + pub fn check_for_panics(&mut self) { + let update_task = self.update_task.take(); + + if let Some(update_task) = update_task { + if update_task.is_finished() { + // We use into_inner() because it guarantees that exactly one of the tasks + // gets the JoinHandle. try_unwrap() lets us keep the JoinHandle, but it can also + // miss panics. + if let Some(update_task) = Arc::into_inner(update_task) { + // We are the last handle with a reference to this task, + // so we can propagate any panics + if let Err(thread_panic) = update_task.join() { + panic::resume_unwind(thread_panic); + } + } + } else { + // It hasn't finished, so we need to put it back + self.update_task = Some(update_task); + } + } + } + + /// Wait for the spawned thread to finish. If it exited with a panic, resume that panic. + /// + /// This method should be called during shutdown. + pub fn wait_for_panics(&mut self) { + if let Some(update_task) = self.update_task.take() { + // We use into_inner() because it guarantees that exactly one of the tasks + // gets the JoinHandle. See the comments in check_for_panics(). + if let Some(update_task) = Arc::into_inner(update_task) { + // We are the last handle with a reference to this task, + // so we can propagate any panics + if let Err(thread_panic) = update_task.join() { + panic::resume_unwind(thread_panic); + } + } + } + } } impl Drop for DbFormatChangeThreadHandle { fn drop(&mut self) { // Only cancel the format change if the state service is shutting down. - self.cancel_if_needed(); - - // There's no point waiting for the task to finish here. + if self.cancel_if_needed() { + self.wait_for_panics(); + } else { + self.check_for_panics(); + } } } diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index 537126c0e9b..542a7e27653 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -96,6 +96,19 @@ impl ZebraDb { self.db.path() } + /// Check for panics in code running in spawned threads. + /// If a thread exited with a panic, resume that panic. + /// + /// This method should be called regularly, so that panics are detected as soon as possible. + pub fn check_for_panics(&mut self) { + if let Some(format_change_handle) = self.format_change_handle.as_mut() { + format_change_handle.check_for_panics(); + } + + // This check doesn't panic, but we want to check it regularly anyway. + self.check_max_on_disk_tip_height(); + } + /// Shut down the database, cleaning up background tasks and ephemeral data. /// /// If `force` is true, clean up regardless of any shared references. @@ -111,12 +124,12 @@ impl ZebraDb { // // See also the correctness note in `DiskDb::shutdown()`. if force || self.db.shared_database_owners() <= 1 { - if let Some(format_change_handle) = self.format_change_handle.as_ref() { + if let Some(format_change_handle) = self.format_change_handle.as_mut() { format_change_handle.force_cancel(); } } - self.check_max_on_disk_tip_height(); + self.check_for_panics(); self.db.shutdown(force); } From 376c8001d92a74707441a4b3f13b7f134e4b6669 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 26 Jun 2023 08:07:45 +1000 Subject: [PATCH 05/43] Only run example upgrade once, change version field names --- .../finalized_state/disk_format/upgrade.rs | 68 +++++++++++-------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index f5f3dc6c38e..0356070e179 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -19,21 +19,21 @@ use crate::{config::write_database_format_version_to_disk, Config}; /// The kind of database format change we're performing. #[derive(Clone, Debug, Eq, PartialEq)] pub enum DbFormatChange { - /// Upgrading the format from `disk_version` to `running_version`. + /// Upgrading the format from `older_disk_version` to `newer_running_version`. /// /// Until this upgrade is complete, the format is a mixture of both versions. Upgrade { - disk_version: Version, - running_version: Version, + older_disk_version: Version, + newer_running_version: Version, }, - /// Marking the format as downgraded from `disk_version` to `running_version`. + /// Marking the format as downgraded from `newer_disk_version` to `older_running_version`. /// - /// Until the state is upgraded to `disk_version` by a later Zebra version, - /// the format will be a mixture of both versions. + /// Until the state is upgraded to `newer_disk_version` by a Zebra version with that state + /// version (or greater), the format will be a mixture of both versions. Downgrade { - disk_version: Version, - running_version: Version, + newer_disk_version: Version, + older_running_version: Version, }, } @@ -88,8 +88,8 @@ impl DbFormatChange { ); Some(Upgrade { - disk_version, - running_version, + older_disk_version: disk_version, + newer_running_version: running_version, }) } Ordering::Greater => { @@ -100,15 +100,12 @@ impl DbFormatChange { ); Some(Downgrade { - disk_version, - running_version, + newer_disk_version: disk_version, + older_running_version: running_version, }) } Ordering::Equal => { - info!( - ?running_version, - "trying to open compatible database format" - ); + info!(?running_version, "trying to open matching database format"); None } @@ -116,6 +113,7 @@ impl DbFormatChange { } /// Returns true if this change is an upgrade. + #[allow(dead_code)] pub fn is_upgrade(&self) -> bool { matches!(self, Upgrade { .. }) } @@ -164,7 +162,9 @@ impl DbFormatChange { initial_tip_height: Option, cancel_receiver: mpsc::Receiver, ) { - if !self.is_upgrade() { + let Upgrade { newer_running_version: _, older_disk_version } = self else { + // If it's not an upgrade, it's a downgrade. + // # Correctness // // At the start of a format downgrade, the database must be marked as partially or @@ -178,9 +178,9 @@ impl DbFormatChange { // The resposibility of staying backwards-compatible is on the newer version. // We do this on a best-effort basis for versions that are still supported. return; - } + }; - // # Correctness + // # New Upgrades Sometimes Go Here // // If the format change is outside RocksDb, put new code above this comment! let Some(initial_tip_height) = initial_tip_height else { @@ -192,19 +192,31 @@ impl DbFormatChange { // Example format change. // // TODO: link to format upgrade instructions doc here - let mut upgrade_height = Height(0); - // Keep upgrading until the initial database has been upgraded, - // or this task is cancelled by a shutdown. - while upgrade_height <= initial_tip_height - && matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) - { - // TODO: Do one format upgrade step here + // Check if we need to do this upgrade. + let database_format_add_format_change_task = + Version::parse("25.0.2").expect("version constant is valid"); + + if older_disk_version < database_format_add_format_change_task { + let mut upgrade_height = Height(0); - upgrade_height = (upgrade_height + 1).expect("task exits before maximum height"); + // Go through every height from genesis to the tip of the old version. + // If the state was downgraded, some heights might already be upgraded. + // (Since the upgraded format is added to the tip, the database can switch between + // lower and higher versions at any block.) + // + // Keep upgrading until the initial database has been upgraded, + // or this task is cancelled by a shutdown. + while upgrade_height <= initial_tip_height + && matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) + { + // TODO: Do one format upgrade step here + + upgrade_height = (upgrade_height + 1).expect("task exits before maximum height"); + } } - // # Correctness + // # New Upgrades Usually Go Here // // New code goes above this comment! // From b05ce29becabe3c3218d4ee183cf2acf1bcc66e2 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 26 Jun 2023 08:08:55 +1000 Subject: [PATCH 06/43] Increment database format to 25.0.2: add format change task --- zebra-state/src/constants.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index 011b4115eda..4d359a8c0ae 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -52,7 +52,7 @@ pub const DATABASE_FORMAT_MINOR_VERSION: u64 = 0; /// The database format patch version, incremented each time the on-disk database format has a /// significant format compatibility fix. -pub const DATABASE_FORMAT_PATCH_VERSION: u64 = 1; +pub const DATABASE_FORMAT_PATCH_VERSION: u64 = 2; /// The name of the file containing the minor and patch database versions. pub const DATABASE_FORMAT_VERSION_FILE_NAME: &str = "version"; From 45b232e14f252e01dbafd821b153b895e592819f Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 26 Jun 2023 08:20:34 +1000 Subject: [PATCH 07/43] Log the running and initial disk database format versions on startup --- zebra-state/src/constants.rs | 16 +++++++++------- zebra-state/src/lib.rs | 5 ++++- zebrad/src/application.rs | 28 ++++++++++++++++++++++++---- zebrad/tests/acceptance.rs | 2 +- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index 4d359a8c0ae..bd60f3d6198 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -5,7 +5,7 @@ use regex::Regex; // For doc comment links #[allow(unused_imports)] -use crate::config::{database_format_version_in_code, database_format_version_on_disk}; +use crate::config::{self, Config}; pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY; @@ -37,9 +37,9 @@ pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1; /// - we previously added compatibility code, and /// - it's available in all supported Zebra versions. /// -/// Use [`database_format_version_in_code()`] or [`database_format_version_on_disk()`] -/// to get the full semantic format version. -pub const DATABASE_FORMAT_VERSION: u64 = 25; +/// Use [`config::database_format_version_in_code()`] or +/// [`config::database_format_version_on_disk()`] to get the full semantic format version. +pub(crate) const DATABASE_FORMAT_VERSION: u64 = 25; /// The database format minor version, incremented each time the on-disk database format has a /// significant data format change. @@ -48,14 +48,16 @@ pub const DATABASE_FORMAT_VERSION: u64 = 25; /// - adding new column families, /// - changing the format of a column family in a compatible way, or /// - breaking changes with compatibility code in all supported Zebra versions. -pub const DATABASE_FORMAT_MINOR_VERSION: u64 = 0; +pub(crate) const DATABASE_FORMAT_MINOR_VERSION: u64 = 0; /// The database format patch version, incremented each time the on-disk database format has a /// significant format compatibility fix. -pub const DATABASE_FORMAT_PATCH_VERSION: u64 = 2; +pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 2; /// The name of the file containing the minor and patch database versions. -pub const DATABASE_FORMAT_VERSION_FILE_NAME: &str = "version"; +/// +/// Use [`Config::version_file_path()`] to get the path to this file. +pub(crate) const DATABASE_FORMAT_VERSION_FILE_NAME: &str = "version"; /// The maximum number of blocks to check for NU5 transactions, /// before we assume we are on a pre-NU5 legacy chain. diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index f75721c5d57..44a337b88da 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -29,7 +29,10 @@ mod service; #[cfg(test)] mod tests; -pub use config::{check_and_delete_old_databases, Config}; +pub use config::{ + check_and_delete_old_databases, database_format_version_in_code, + database_format_version_on_disk, Config, +}; pub use constants::MAX_BLOCK_REORG_HEIGHT; pub use error::{ BoxError, CloneError, CommitSemanticallyVerifiedError, DuplicateNullifierError, diff --git a/zebrad/src/application.rs b/zebrad/src/application.rs index d701bd80870..161719dbff7 100644 --- a/zebrad/src/application.rs +++ b/zebrad/src/application.rs @@ -12,7 +12,9 @@ use abscissa_core::{ use semver::{BuildMetadata, Version}; use zebra_network::constants::PORT_IN_USE_ERROR; -use zebra_state::constants::{DATABASE_FORMAT_VERSION, LOCK_FILE_ERROR}; +use zebra_state::{ + constants::LOCK_FILE_ERROR, database_format_version_in_code, database_format_version_on_disk, +}; use crate::{ commands::EntryPoint, @@ -260,13 +262,31 @@ impl Application for ZebradApp { // collect the common metadata for the issue URL and panic report, // skipping any env vars that aren't present + // reads state disk version file, doesn't open RocksDB database + let disk_db_version = + match database_format_version_on_disk(&config.state, config.network.network) { + Ok(Some(version)) => version.to_string(), + Ok(None) => "creating new database".to_string(), + Err(error) => { + let mut error = format!("error: {error:?}"); + error.truncate(100); + error + } + }; + let app_metadata = vec![ - // cargo or git tag + short commit + // build-time constant: cargo or git tag + short commit ("version", build_version().to_string()), // config ("Zcash network", config.network.network.to_string()), - // constants - ("state version", DATABASE_FORMAT_VERSION.to_string()), + // code constant + ( + "running state version", + database_format_version_in_code().to_string(), + ), + // state disk file, doesn't open database + ("initial disk state version", disk_db_version), + // build-time constant ("features", env!("VERGEN_CARGO_FEATURES").to_string()), ]; diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 57062d59877..be913d1e741 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2089,7 +2089,7 @@ fn zebra_state_conflict() -> Result<()> { dir_conflict_full.push("state"); dir_conflict_full.push(format!( "v{}", - zebra_state::constants::DATABASE_FORMAT_VERSION + zebra_state::database_format_version_in_code().major, )); dir_conflict_full.push(config.network.network.to_string().to_lowercase()); format!( From 1ee0ab45090507297aab75deadda8641964e2580 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 26 Jun 2023 09:23:04 +1000 Subject: [PATCH 08/43] Add initial disk and running state versions to cached state images in CI --- .github/workflows/deploy-gcp-tests.yml | 102 ++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 263f7e1e85b..b79bf059cd5 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -1719,6 +1719,96 @@ jobs: echo "UPDATE_SUFFIX=$UPDATE_SUFFIX" >> "$GITHUB_ENV" echo "TIME_SUFFIX=$TIME_SUFFIX" >> "$GITHUB_ENV" + # Get the full initial and running database versions from the test logs. + # These versions are used as part of the disk description and labels. + # + # If these versions are missing from the logs, the job fails. + # + # Typically, the database versions are around line 20 in the logs.. + # But we check the first 1000 log lines, just in case the test harness recompiles all the + # dependencies before running the test. (This can happen if the cache is invalid.) + # + # Passes the versions to subsequent steps using the $INITIAL_DISK_DB_VERSION, + # $RUNNING_DB_VERSION, and $DB_VERSION_SUMMARY env variables. + - name: Get database versions from logs + run: | + INITIAL_DISK_DB_VERSION="" + RUNNING_DB_VERSION="" + DB_VERSION_SUMMARY="" + + DOCKER_LOGS=$( \ + gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ vars.GCP_ZONE }} \ + --ssh-flag="-o ServerAliveInterval=5" \ + --ssh-flag="-o ConnectionAttempts=20" \ + --ssh-flag="-o ConnectTimeout=5" \ + --command=" \ + sudo docker logs ${{ inputs.test_id }} | head +1000 \ + ") + + # either a semantic version or "creating new database" + INITIAL_DISK_DB_VERSION=$( \ + echo "$DOCKER_LOGS" | \ + grep --extended-regexp --only-matching 'initial disk state version: [0-9a-z\. ]+' | \ + grep --extended-regexp --only-matching '[0-9a-z\. ]+' | \ + tail -1 || \ + [[ $? == 1 ]] \ + ) + + if [[ -z "$INITIAL_DISK_DB_VERSION" ]]; then + echo "Checked logs:" + echo "" + echo "$DOCKER_LOGS" + echo "" + echo "Missing initial disk database version in logs: $INITIAL_DISK_DB_VERSION" + # Fail the tests, because Zebra didn't log the initial disk database version, + # or the regex in this step is wrong. + false + fi + + if [[ "$INITIAL_DISK_DB_VERSION" = "creating new database" ]]; then + INITIAL_DISK_DB_VERSION="new" + else + INITIAL_DISK_DB_VERSION="v${INITIAL_DISK_DB_VERSION//./-}" + fi + + echo "Found initial disk database version in logs: $INITIAL_DISK_DB_VERSION" + echo "INITIAL_DISK_DB_VERSION=$INITIAL_DISK_DB_VERSION" >> "$INITIAL_DISK_DB_VERSION" + + RUNNING_DB_VERSION=$( \ + echo "$DOCKER_LOGS" | \ + grep --extended-regexp --only-matching 'running state version: [0-9\.]+' | \ + grep --extended-regexp --only-matching '[0-9\.]+' | \ + tail -1 || \ + [[ $? == 1 ]] \ + ) + + if [[ -z "$RUNNING_DB_VERSION" ]]; then + echo "Checked logs:" + echo "" + echo "$DOCKER_LOGS" + echo "" + echo "Missing running database version in logs: $RUNNING_DB_VERSION" + # Fail the tests, because Zebra didn't log the running database version, + # or the regex in this step is wrong. + false + fi + + RUNNING_DB_VERSION="v${RUNNING_DB_VERSION//./-}" + echo "Found running database version in logs: $RUNNING_DB_VERSION" + echo "RUNNING_DB_VERSION=$RUNNING_DB_VERSION" >> "$RUNNING_DB_VERSION" + + if [[ "$INITIAL_DISK_DB_VERSION" = "$RUNNING_DB_VERSION" ]]; then + DB_VERSION_SUMMARY="$RUNNING_DB_VERSION" + elif [[ "$INITIAL_DISK_DB_VERSION" = "new" ]]; then + DB_VERSION_SUMMARY="$RUNNING_DB_VERSION in new database" + else + DB_VERSION_SUMMARY="$INITIAL_DISK_DB_VERSION upgrading to $RUNNING_DB_VERSION" + fi + + echo "Summarised database versions from logs: $DB_VERSION_SUMMARY" + echo "DB_VERSION_SUMMARY=$DB_VERSION_SUMMARY" >> "$DB_VERSION_SUMMARY" + # Get the sync height from the test logs, which is later used as part of the # disk description and labels. # @@ -1728,7 +1818,7 @@ jobs: # # If the sync height is missing from the logs, the job fails. # - # Passes the sync height to subsequent steps using $SYNC_HEIGHT env variable. + # Passes the sync height to subsequent steps using the $SYNC_HEIGHT env variable. - name: Get sync height from logs run: | SYNC_HEIGHT="" @@ -1746,12 +1836,16 @@ jobs: SYNC_HEIGHT=$( \ echo "$DOCKER_LOGS" | \ grep --extended-regexp --only-matching '${{ inputs.height_grep_text }}[0-9]+' | \ - grep --extended-regexp --only-matching '[0-9]+' | \ + grep --extended-regexp --only-matching '[0-9]+' | \ tail -1 || \ [[ $? == 1 ]] \ ) if [[ -z "$SYNC_HEIGHT" ]]; then + echo "Checked logs:" + echo "" + echo "$DOCKER_LOGS" + echo "" echo "Missing sync height in logs: $SYNC_HEIGHT" # Fail the tests, because Zebra and lightwalletd didn't log their sync heights, # or the CI workflow sync height regex is wrong. @@ -1820,8 +1914,8 @@ jobs: --source-disk=${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \ --source-disk-zone=${{ vars.GCP_ZONE }} \ --storage-location=us \ - --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" \ - --labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},network=${NETWORK},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}" + --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }} and database format ${{ env.DB_VERSION_SUMMARY }}" \ + --labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},state-running-version=${{ env.RUNNING_DB_VERSION }},initial-state-disk-version=${{ env.INITIAL_DISK_DB_VERSION }},network=${NETWORK},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}" else echo "Skipped cached state update because the new sync height $SYNC_HEIGHT was less than $CACHED_STATE_UPDATE_LIMIT blocks above the original height $ORIGINAL_HEIGHT" fi From c0c721552b1f5c6de2c7650d0d7a8e7b71aee022 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 09:19:33 +1000 Subject: [PATCH 09/43] Fix missing imports --- zebra-state/src/service/finalized_state/disk_db.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index 28f7cb6416b..b61d6abdf5f 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -10,7 +10,13 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constants must //! be incremented each time the database format (column, serialization, etc) changes. -use std::{fmt::Debug, path::Path, sync::Arc}; +use std::{ + collections::{BTreeMap, HashMap}, + fmt::Debug, + ops::RangeBounds, + path::Path, + sync::Arc, +}; use itertools::Itertools; use rlimit::increase_nofile_limit; From 7fcef28fefe95bfc0b04e621ff4d8d29be327b7b Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 09:21:38 +1000 Subject: [PATCH 10/43] Fix typo in logs workflow command --- .github/workflows/deploy-gcp-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index b79bf059cd5..f58ab8cb4d4 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -1743,7 +1743,7 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=" \ - sudo docker logs ${{ inputs.test_id }} | head +1000 \ + sudo docker logs ${{ inputs.test_id }} | head -1000 \ ") # either a semantic version or "creating new database" From 32d0e9c062b8f26288a21fd5937c0c39ee15de6c Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 11:13:15 +1000 Subject: [PATCH 11/43] Add a force_save_to_disk argument to the CI workflow --- .github/workflows/continous-integration-docker.yml | 11 +++++++++++ .github/workflows/deploy-gcp-tests.yml | 13 +++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.github/workflows/continous-integration-docker.yml b/.github/workflows/continous-integration-docker.yml index 902218ef266..c58b3bee082 100644 --- a/.github/workflows/continous-integration-docker.yml +++ b/.github/workflows/continous-integration-docker.yml @@ -35,6 +35,11 @@ on: default: false description: 'Just run a lightwalletd full sync and update tip disks' required: true + force_save_to_disk: + required: false + type: boolean + default: false + description: 'Force tests to always create a cached state disk, if they already create disks' no_cache: description: 'Disable the Docker cache for this build' required: false @@ -330,6 +335,7 @@ jobs: test_variables: '-e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1' needs_zebra_state: false saves_to_disk: true + force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} disk_suffix: checkpoint height_grep_text: 'flushing database to disk .*height.*=.*Height.*\(' secrets: inherit @@ -391,6 +397,7 @@ jobs: is_long_test: true needs_zebra_state: false saves_to_disk: true + force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} disk_suffix: tip height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit @@ -430,6 +437,7 @@ jobs: needs_zebra_state: true # update the disk on every PR, to increase CI speed saves_to_disk: true + force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} disk_suffix: tip root_state_path: '/var/cache' zebra_state_dir: 'zebrad-cache' @@ -502,6 +510,7 @@ jobs: is_long_test: true needs_zebra_state: false saves_to_disk: true + force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} disk_suffix: tip height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit @@ -545,6 +554,7 @@ jobs: # update the disk on every PR, to increase CI speed # we don't have a test-update-sync-testnet job, so we need to update the disk here saves_to_disk: true + force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} disk_suffix: tip root_state_path: '/var/cache' zebra_state_dir: 'zebrad-cache' @@ -577,6 +587,7 @@ jobs: needs_zebra_state: true needs_lwd_state: false saves_to_disk: true + force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} disk_prefix: lwd-cache disk_suffix: tip root_state_path: '/var/cache' diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index f58ab8cb4d4..451b295d2c8 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -79,7 +79,12 @@ on: saves_to_disk: required: true type: boolean - description: 'Does the test create a new cached state disk?' + description: 'Can this test create new or updated cached state disks?' + force_save_to_disk: + required: false + type: boolean + default: false + description: 'Force this test to create a new or updated cached state disk' app_name: required: false type: string @@ -1630,7 +1635,7 @@ jobs: # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one. # Normally, if a job is skipped, all the jobs that depend on it are also skipped. # So we need to override the default success() check to make this job run. - if: ${{ !cancelled() && !failure() && inputs.saves_to_disk }} + if: ${{ !cancelled() && !failure() && (inputs.saves_to_disk || inputs.force_save_to_disk) }} permissions: contents: 'read' id-token: 'write' @@ -1907,7 +1912,7 @@ jobs: - name: Create image from state disk run: | MINIMUM_UPDATE_HEIGHT=$((ORIGINAL_HEIGHT+CACHED_STATE_UPDATE_LIMIT)) - if [[ -z "$UPDATE_SUFFIX" ]] || [[ "$SYNC_HEIGHT" -gt "$MINIMUM_UPDATE_HEIGHT" ]]; then + if [[ -z "$UPDATE_SUFFIX" ]] || [[ "$SYNC_HEIGHT" -gt "$MINIMUM_UPDATE_HEIGHT" ]] || [[ "${{ inputs.force_save_to_disk }}" == "true" ]]; then gcloud compute images create \ "${{ inputs.disk_prefix }}-${SHORT_GITHUB_REF}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${NETWORK}-${{ inputs.disk_suffix }}${UPDATE_SUFFIX}-${TIME_SUFFIX}" \ --force \ @@ -1915,7 +1920,7 @@ jobs: --source-disk-zone=${{ vars.GCP_ZONE }} \ --storage-location=us \ --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }} and database format ${{ env.DB_VERSION_SUMMARY }}" \ - --labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},state-running-version=${{ env.RUNNING_DB_VERSION }},initial-state-disk-version=${{ env.INITIAL_DISK_DB_VERSION }},network=${NETWORK},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}" + --labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},state-running-version=${{ env.RUNNING_DB_VERSION }},initial-state-disk-version=${{ env.INITIAL_DISK_DB_VERSION }},network=${NETWORK},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},force-save=${{ inputs.force_save_to_disk }},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}" else echo "Skipped cached state update because the new sync height $SYNC_HEIGHT was less than $CACHED_STATE_UPDATE_LIMIT blocks above the original height $ORIGINAL_HEIGHT" fi From f961230e263350b2341e3c6cf947612fd6e30382 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 11:40:51 +1000 Subject: [PATCH 12/43] Move use_internet_connection into zebrad_config() --- zebrad/tests/common/launch.rs | 14 ++------------ zebrad/tests/common/test_type.rs | 31 +++++++++++++++++++++++++------ 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs index 0ed4899b4f4..25213daf6bb 100644 --- a/zebrad/tests/common/launch.rs +++ b/zebrad/tests/common/launch.rs @@ -13,7 +13,6 @@ use std::{ }; use color_eyre::eyre::Result; -use indexmap::IndexSet; use tempfile::TempDir; use zebra_chain::parameters::Network; @@ -236,19 +235,10 @@ pub fn spawn_zebrad_for_rpc + std::fmt::Debug>( // Get the zebrad config let mut config = test_type - .zebrad_config(test_name) + .zebrad_config(test_name, use_internet_connection) .expect("already checked config")?; - // TODO: move this into zebrad_config() config.network.network = network; - if !use_internet_connection { - config.network.initial_mainnet_peers = IndexSet::new(); - config.network.initial_testnet_peers = IndexSet::new(); - // Avoid re-using cached peers from disk when we're supposed to be a disconnected instance - config.network.cache_dir = CacheDir::disabled(); - - config.mempool.debug_enable_at_height = Some(0); - } let (zebrad_failure_messages, zebrad_ignore_messages) = test_type.zebrad_failure_messages(); @@ -282,7 +272,7 @@ pub fn can_spawn_zebrad_for_rpc + std::fmt::Debug>( } // Check if we have any necessary cached states for the zebrad config - test_type.zebrad_config(test_name).is_some() + test_type.zebrad_config(test_name, true).is_some() } /// Panics if `$pred` is false, with an error report containing: diff --git a/zebrad/tests/common/test_type.rs b/zebrad/tests/common/test_type.rs index a94420160b9..90fc6f0c78b 100644 --- a/zebrad/tests/common/test_type.rs +++ b/zebrad/tests/common/test_type.rs @@ -2,6 +2,9 @@ use std::{env, path::PathBuf, time::Duration}; +use indexmap::IndexSet; + +use zebra_network::CacheDir; use zebra_test::{command::NO_MATCHES_REGEX_ITER, prelude::*}; use zebrad::config::ZebradConfig; @@ -154,7 +157,11 @@ impl TestType { /// /// Returns `None` if the test should be skipped, /// and `Some(Err(_))` if the config could not be created. - pub fn zebrad_config>(&self, test_name: S) -> Option> { + pub fn zebrad_config>( + &self, + test_name: S, + use_internet_connection: bool, + ) -> Option> { let config = if self.needs_zebra_rpc_server() { // This is what we recommend our users configure. random_known_rpc_port_config(true) @@ -177,23 +184,35 @@ impl TestType { config.rpc.parallel_cpu_threads = 0; } - if !self.needs_zebra_cached_state() { - return Some(Ok(config)); + if !use_internet_connection { + config.network.initial_mainnet_peers = IndexSet::new(); + config.network.initial_testnet_peers = IndexSet::new(); + // Avoid re-using cached peers from disk when we're supposed to be a disconnected instance + config.network.cache_dir = CacheDir::disabled(); + + // Activate the mempool immediately by default + config.mempool.debug_enable_at_height = Some(0); } + // Add a fake miner address for mining RPCs #[cfg(feature = "getblocktemplate-rpcs")] let _ = config.mining.miner_address.insert( zebra_chain::transparent::Address::from_script_hash(config.network.network, [0x7e; 20]), ); + if !self.needs_zebra_cached_state() { + return Some(Ok(config)); + } + + // If we have a cached state, update the config to use it let zebra_state_path = self.zebrad_state_path(test_name)?; + config.state.ephemeral = false; + config.state.cache_dir = zebra_state_path; + // And reset the concurrency to the default value config.sync.checkpoint_verify_concurrency_limit = zebrad::components::sync::DEFAULT_CHECKPOINT_CONCURRENCY_LIMIT; - config.state.ephemeral = false; - config.state.cache_dir = zebra_state_path; - Some(Ok(config)) } From 7f222aff6172e9583c68883bf2c3131783f77f4d Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 11:48:40 +1000 Subject: [PATCH 13/43] fastmod can_spawn_zebrad_for_rpc can_spawn_zebrad_for_test_type zebra* --- .../common/get_block_template_rpcs/get_block_template.rs | 4 ++-- .../tests/common/get_block_template_rpcs/get_peer_info.rs | 6 +++--- zebrad/tests/common/get_block_template_rpcs/submit_block.rs | 6 +++--- zebrad/tests/common/launch.rs | 4 ++-- zebrad/tests/common/lightwalletd/send_transaction_test.rs | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/zebrad/tests/common/get_block_template_rpcs/get_block_template.rs b/zebrad/tests/common/get_block_template_rpcs/get_block_template.rs index 78631f66bfb..689c51ce921 100644 --- a/zebrad/tests/common/get_block_template_rpcs/get_block_template.rs +++ b/zebrad/tests/common/get_block_template_rpcs/get_block_template.rs @@ -21,7 +21,7 @@ use zebra_rpc::methods::get_block_template_rpcs::{ }; use crate::common::{ - launch::{can_spawn_zebrad_for_rpc, spawn_zebrad_for_rpc}, + launch::{can_spawn_zebrad_for_test_type, spawn_zebrad_for_rpc}, sync::{check_sync_logs_until, MempoolBehavior, SYNC_FINISHED_REGEX}, test_type::TestType, }; @@ -66,7 +66,7 @@ pub(crate) async fn run() -> Result<()> { let network = Network::Mainnet; // Skip the test unless the user specifically asked for it and there is a zebrad_state_path - if !can_spawn_zebrad_for_rpc(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type) { return Ok(()); } diff --git a/zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs b/zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs index 30dbe7db3d1..7d0518c00db 100644 --- a/zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs +++ b/zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs @@ -7,7 +7,7 @@ use zebra_node_services::rpc_client::RpcRequestClient; use zebra_rpc::methods::get_block_template_rpcs::types::peer_info::PeerInfo; use crate::common::{ - launch::{can_spawn_zebrad_for_rpc, spawn_zebrad_for_rpc}, + launch::{can_spawn_zebrad_for_test_type, spawn_zebrad_for_rpc}, test_type::TestType, }; @@ -21,7 +21,7 @@ pub(crate) async fn run() -> Result<()> { let network = Network::Mainnet; // Skip the test unless the user specifically asked for it and there is a zebrad_state_path - if !can_spawn_zebrad_for_rpc(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type) { return Ok(()); } @@ -29,7 +29,7 @@ pub(crate) async fn run() -> Result<()> { let (mut zebrad, zebra_rpc_address) = spawn_zebrad_for_rpc(network, test_name, test_type, true)? - .expect("Already checked zebra state path with can_spawn_zebrad_for_rpc"); + .expect("Already checked zebra state path with can_spawn_zebrad_for_test_type"); let rpc_address = zebra_rpc_address.expect("getpeerinfo test must have RPC port"); diff --git a/zebrad/tests/common/get_block_template_rpcs/submit_block.rs b/zebrad/tests/common/get_block_template_rpcs/submit_block.rs index de034460c8d..adea5fe5a7c 100644 --- a/zebrad/tests/common/get_block_template_rpcs/submit_block.rs +++ b/zebrad/tests/common/get_block_template_rpcs/submit_block.rs @@ -15,7 +15,7 @@ use zebra_node_services::rpc_client::RpcRequestClient; use crate::common::{ cached_state::get_raw_future_blocks, - launch::{can_spawn_zebrad_for_rpc, spawn_zebrad_for_rpc}, + launch::{can_spawn_zebrad_for_test_type, spawn_zebrad_for_rpc}, test_type::TestType, }; @@ -31,7 +31,7 @@ pub(crate) async fn run() -> Result<()> { let network = Network::Mainnet; // Skip the test unless the user specifically asked for it and there is a zebrad_state_path - if !can_spawn_zebrad_for_rpc(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type) { return Ok(()); } @@ -50,7 +50,7 @@ pub(crate) async fn run() -> Result<()> { let should_sync = false; let (mut zebrad, zebra_rpc_address) = spawn_zebrad_for_rpc(network, test_name, test_type, should_sync)? - .expect("Already checked zebra state path with can_spawn_zebrad_for_rpc"); + .expect("Already checked zebra state path with can_spawn_zebrad_for_test_type"); let rpc_address = zebra_rpc_address.expect("submitblock test must have RPC port"); diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs index 25213daf6bb..baabcb2ccdd 100644 --- a/zebrad/tests/common/launch.rs +++ b/zebrad/tests/common/launch.rs @@ -229,7 +229,7 @@ pub fn spawn_zebrad_for_rpc + std::fmt::Debug>( let test_name = test_name.as_ref(); // Skip the test unless the user specifically asked for it - if !can_spawn_zebrad_for_rpc(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type) { return Ok(None); } @@ -256,7 +256,7 @@ pub fn spawn_zebrad_for_rpc + std::fmt::Debug>( /// Returns `true` if a zebrad test for `test_type` has everything it needs to run. #[tracing::instrument] -pub fn can_spawn_zebrad_for_rpc + std::fmt::Debug>( +pub fn can_spawn_zebrad_for_test_type + std::fmt::Debug>( test_name: S, test_type: TestType, ) -> bool { diff --git a/zebrad/tests/common/lightwalletd/send_transaction_test.rs b/zebrad/tests/common/lightwalletd/send_transaction_test.rs index c58c7b8386e..1ad46c33892 100644 --- a/zebrad/tests/common/lightwalletd/send_transaction_test.rs +++ b/zebrad/tests/common/lightwalletd/send_transaction_test.rs @@ -30,7 +30,7 @@ use zebrad::components::mempool::downloads::MAX_INBOUND_CONCURRENCY; use crate::common::{ cached_state::get_future_blocks, - launch::{can_spawn_zebrad_for_rpc, spawn_zebrad_for_rpc}, + launch::{can_spawn_zebrad_for_test_type, spawn_zebrad_for_rpc}, lightwalletd::{ can_spawn_lightwalletd_for_rpc, spawn_lightwalletd_for_rpc, sync::wait_for_zebrad_and_lightwalletd_sync, @@ -62,7 +62,7 @@ pub async fn run() -> Result<()> { let network = Mainnet; // Skip the test unless the user specifically asked for it - if !can_spawn_zebrad_for_rpc(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type) { return Ok(()); } From fc6e6e5ebe6399fc724b49d76eaaae8e7612379a Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 11:50:20 +1000 Subject: [PATCH 14/43] Add a spawn_zebrad_without_rpc() function --- zebrad/tests/common/launch.rs | 58 +++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs index baabcb2ccdd..371fb66ee48 100644 --- a/zebrad/tests/common/launch.rs +++ b/zebrad/tests/common/launch.rs @@ -254,6 +254,64 @@ pub fn spawn_zebrad_for_rpc + std::fmt::Debug>( Ok(Some((zebrad, config.rpc.listen_addr))) } +/// Spawns a zebrad instance on `network` without RPCs or `lightwalletd`. +/// +/// If `use_cached_state` is `true`, then update the cached state to the tip. +/// Otherwise, just create an empty state. +/// +/// If `use_internet_connection` is `false` then spawn, but without any peers. +/// This prevents it from downloading blocks. Instead, use the `ZEBRA_CACHED_STATE_DIR` +/// environmental variable to provide an initial state to the zebrad instance. +/// +/// Returns: +/// - `Ok(Some(zebrad))` on success, +/// - `Ok(None)` if the test doesn't have the required network or cached state, and +/// - `Err(_)` if spawning zebrad fails. +#[tracing::instrument] +pub fn spawn_zebrad_without_rpc + std::fmt::Debug>( + network: Network, + test_name: S, + use_cached_state: bool, + use_internet_connection: bool, +) -> Result>> { + use TestType::*; + + let test_name = test_name.as_ref(); + + let test_type = if use_cached_state { + UpdateZebraCachedStateNoRpc + } else { + LaunchWithEmptyState { + launches_lightwalletd: false, + } + }; + + // Skip the test unless the user specifically asked for it + if !can_spawn_zebrad_for_test_type(test_name, test_type) { + return Ok(None); + } + + // Get the zebrad config + let mut config = test_type + .zebrad_config(test_name, use_internet_connection) + .expect("already checked config")?; + + config.network.network = network; + + let (zebrad_failure_messages, zebrad_ignore_messages) = test_type.zebrad_failure_messages(); + + // Writes a configuration that does not have RPC listen_addr set. + // If the state path env var is set, uses it in the config. + let zebrad = testdir()? + .with_exact_config(&config)? + .spawn_child(args!["start"])? + .bypass_test_capture(true) + .with_timeout(test_type.zebrad_timeout()) + .with_failure_regex_iter(zebrad_failure_messages, zebrad_ignore_messages); + + Ok(Some(zebrad)) +} + /// Returns `true` if a zebrad test for `test_type` has everything it needs to run. #[tracing::instrument] pub fn can_spawn_zebrad_for_test_type + std::fmt::Debug>( From 571fb5247b5e16d92e5dd53ce08cce341f75dc3d Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 11:54:46 +1000 Subject: [PATCH 15/43] Remove unused copy_state() test code --- zebrad/tests/common/cached_state.rs | 80 ----------------------------- zebrad/tests/common/sync.rs | 30 +---------- 2 files changed, 1 insertion(+), 109 deletions(-) diff --git a/zebrad/tests/common/cached_state.rs b/zebrad/tests/common/cached_state.rs index 432e7ae5322..284add1c0df 100644 --- a/zebrad/tests/common/cached_state.rs +++ b/zebrad/tests/common/cached_state.rs @@ -11,8 +11,6 @@ use std::{ }; use color_eyre::eyre::{eyre, Result}; -use tempfile::TempDir; -use tokio::fs; use tower::{util::BoxService, Service}; use zebra_chain::{ @@ -25,7 +23,6 @@ use zebra_node_services::rpc_client::RpcRequestClient; use zebra_state::{ChainTipChange, LatestChainTip, MAX_BLOCK_REORG_HEIGHT}; use crate::common::{ - config::testdir, launch::spawn_zebrad_for_rpc, sync::{check_sync_logs_until, MempoolBehavior, SYNC_FINISHED_REGEX}, test_type::TestType, @@ -78,83 +75,6 @@ pub async fn load_tip_height_from_state_directory( Ok(chain_tip_height) } -/// Recursively copy a chain state database directory into a new temporary directory. -pub async fn copy_state_directory(network: Network, source: impl AsRef) -> Result { - // Copy the database files for this state and network, excluding testnet and other state versions - let source = source.as_ref(); - let state_config = zebra_state::Config { - cache_dir: source.into(), - ..Default::default() - }; - let source_net_dir = state_config.db_path(network); - let source_net_dir = source_net_dir.as_path(); - let state_suffix = source_net_dir - .strip_prefix(source) - .expect("db_path() is a subdirectory"); - - let destination = testdir()?; - let destination_net_dir = destination.path().join(state_suffix); - - tracing::info!( - ?source, - ?source_net_dir, - ?state_suffix, - ?destination, - ?destination_net_dir, - "copying cached state files (this may take some time)...", - ); - - let mut remaining_directories = vec![PathBuf::from(source_net_dir)]; - - while let Some(directory) = remaining_directories.pop() { - let sub_directories = - copy_directory(&directory, source_net_dir, destination_net_dir.as_ref()).await?; - - remaining_directories.extend(sub_directories); - } - - Ok(destination) -} - -/// Copy the contents of a directory, and return the sub-directories it contains. -/// -/// Copies all files from the `directory` into the destination specified by the concatenation of -/// the `base_destination_path` and `directory` stripped of its `prefix`. -#[tracing::instrument] -async fn copy_directory( - directory: &Path, - prefix: &Path, - base_destination_path: &Path, -) -> Result> { - let mut sub_directories = Vec::new(); - let mut entries = fs::read_dir(directory).await?; - - let destination = - base_destination_path.join(directory.strip_prefix(prefix).expect("Invalid path prefix")); - - fs::create_dir_all(&destination).await?; - - while let Some(entry) = entries.next_entry().await? { - let entry_path = entry.path(); - let file_type = entry.file_type().await?; - - if file_type.is_file() { - let file_name = entry_path.file_name().expect("Missing file name"); - let destination_path = destination.join(file_name); - - fs::copy(&entry_path, destination_path).await?; - } else if file_type.is_dir() { - sub_directories.push(entry_path); - } else if file_type.is_symlink() { - unimplemented!("Symbolic link support is currently not necessary"); - } else { - panic!("Unknown file type"); - } - } - - Ok(sub_directories) -} - /// Accepts a network, test_type, test_name, and num_blocks (how many blocks past the finalized tip to try getting) /// /// Syncs zebra until the tip, gets some blocks near the tip, via getblock rpc calls, diff --git a/zebrad/tests/common/sync.rs b/zebrad/tests/common/sync.rs index d7bc91d352c..dd0a1390294 100644 --- a/zebrad/tests/common/sync.rs +++ b/zebrad/tests/common/sync.rs @@ -5,10 +5,7 @@ //! Test functions in this file will not be run. //! This file is only for test library code. -use std::{ - path::{Path, PathBuf}, - time::Duration, -}; +use std::{path::PathBuf, time::Duration}; use color_eyre::eyre::Result; use tempfile::TempDir; @@ -19,7 +16,6 @@ use zebrad::{components::sync, config::ZebradConfig}; use zebra_test::{args, prelude::*}; use super::{ - cached_state::copy_state_directory, config::{persistent_test_config, testdir}, launch::ZebradTestDirExt, }; @@ -341,30 +337,6 @@ pub fn check_sync_logs_until( Ok(zebrad) } -/// Runs a zebrad instance to synchronize the chain to the network tip. -/// -/// The zebrad instance is executed on a copy of the partially synchronized chain state. This copy -/// is returned afterwards, containing the fully synchronized chain state. -#[allow(dead_code)] -#[tracing::instrument] -pub async fn copy_state_and_perform_full_sync( - network: Network, - partial_sync_path: &Path, -) -> Result { - let fully_synced_path = copy_state_directory(network, &partial_sync_path).await?; - - sync_until( - Height::MAX, - network, - SYNC_FINISHED_REGEX, - FINISH_PARTIAL_SYNC_TIMEOUT, - fully_synced_path, - MempoolBehavior::ShouldAutomaticallyActivate, - true, - false, - ) -} - /// Returns a test config for caching Zebra's state up to the mandatory checkpoint. pub fn cached_mandatory_checkpoint_test_config() -> Result { let mut config = persistent_test_config()?; From c1c8ce02931dc54728e50bd0177aa8abbbd37e35 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 13:15:50 +1000 Subject: [PATCH 16/43] Assert that upgrades and downgrades happen with the correct versions --- zebra-state/src/config.rs | 21 ++- .../finalized_state/disk_format/upgrade.rs | 132 ++++++++++++++++-- 2 files changed, 135 insertions(+), 18 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index bf90e052cf2..69020061ff3 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -339,29 +339,40 @@ pub fn database_format_version_on_disk( ))) } -/// Writes the currently running semantic database version to the on-disk database. +/// Writes `changed_version` to the on-disk database after the format is changed. +/// (Or a new database is created.) /// /// # Correctness /// -/// This should only be called after all running format upgrades are complete. +/// This should only be called: +/// - after each format upgrade is complete, +/// - when creating a new database, or +/// - when an older Zebra version opens a newer database. /// /// # Concurrency /// /// This must only be called while RocksDB has an open database for `config`. /// Otherwise, multiple Zebra processes could write the version at the same time, /// corrupting the file. +/// +/// # Panics +/// +/// If the major versions do not match. (The format is incompatible.) pub fn write_database_format_version_to_disk( + changed_version: &Version, config: &Config, network: Network, ) -> Result<(), BoxError> { let version_path = config.version_file_path(network); // The major version is already in the directory path. - let version = format!( - "{}.{}", - DATABASE_FORMAT_MINOR_VERSION, DATABASE_FORMAT_PATCH_VERSION + assert_eq!( + changed_version.major, DATABASE_FORMAT_VERSION, + "tried to do in-place database format change to an incompatible version" ); + let version = format!("{}.{}", changed_version.minor, changed_version.patch); + // # Concurrency // // The caller handles locking for this file write. diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 0356070e179..fc6c3c9b1f2 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -14,7 +14,10 @@ use zebra_chain::{block::Height, parameters::Network}; use DbFormatChange::*; -use crate::{config::write_database_format_version_to_disk, Config}; +use crate::{ + config::write_database_format_version_to_disk, database_format_version_in_code, + database_format_version_on_disk, Config, +}; /// The kind of database format change we're performing. #[derive(Clone, Debug, Eq, PartialEq)] @@ -162,7 +165,11 @@ impl DbFormatChange { initial_tip_height: Option, cancel_receiver: mpsc::Receiver, ) { - let Upgrade { newer_running_version: _, older_disk_version } = self else { + let Upgrade { + newer_running_version, + older_disk_version, + } = self + else { // If it's not an upgrade, it's a downgrade. // # Correctness @@ -170,7 +177,7 @@ impl DbFormatChange { // At the start of a format downgrade, the database must be marked as partially or // fully downgraded. This lets newer Zebra versions know that some blocks with older // formats have been added to the database. - Self::mark_as_changed(&config, network); + Self::mark_as_downgraded(&config, network); // Older supported versions just assume they can read newer formats, // because they can't predict all changes a newer Zebra version could make. @@ -185,7 +192,13 @@ impl DbFormatChange { // If the format change is outside RocksDb, put new code above this comment! let Some(initial_tip_height) = initial_tip_height else { // If the database is empty, then the RocksDb format doesn't need any changes. - Self::mark_as_changed(&config, network); + info!( + ?newer_running_version, + ?older_disk_version, + "marking empty database as upgraded" + ); + + Self::mark_as_upgraded_to(&database_format_version_in_code(), &config, network); return; }; @@ -216,30 +229,123 @@ impl DbFormatChange { } } + // At the end of each format upgrade, the database is marked as upgraded to that version. + // Upgrades can be run more than once if Zebra is restarted, so this is just a performance + // optimisation. + info!( + ?initial_tip_height, + ?newer_running_version, + ?older_disk_version, + "marking database as upgraded" + ); + Self::mark_as_upgraded_to(&database_format_add_format_change_task, &config, network); + // # New Upgrades Usually Go Here // // New code goes above this comment! // // Run the latest format upgrade code after the other upgrades are complete, - // but before marking the format as upgraded. The code should check `cancel_receiver` + // then mark the format as upgraded. The code should check `cancel_receiver` // every time it runs its inner update loop. + } - // At the end of a format upgrade, the database is marked as fully upgraded. - // Upgrades can be run more than once if Zebra is restarted, so this is just a performance - // optimisation. - Self::mark_as_changed(&config, network); + /// Mark the database as upgraded to `format_upgrade_version`. + /// + /// This should be called when an older database is opened by an older Zebra version, + /// after each version upgrade is complete. + /// + /// # Concurrency + /// + /// The version must only be updated while RocksDB is holding the database + /// directory lock. This prevents multiple Zebra instances corrupting the version + /// file. + /// + /// # Panics + /// + /// If the format should not have been upgraded, because the running version is: + /// - older than the disk version (that's a downgrade) + /// - the same as to the disk version (no upgrade needed) + /// + /// If the format should not have been upgraded, because the format upgrade version is: + /// - older or the same as the disk version + /// (multiple upgrades to the same version are not allowed) + /// - greater than the running version (that's a logic bug) + fn mark_as_upgraded_to(format_upgrade_version: &Version, config: &Config, network: Network) { + let disk_version = database_format_version_on_disk(config, network) + .expect("unable to read database format version file") + .expect("tried to upgrade a newly created database"); + let running_version = database_format_version_in_code(); + + assert!( + running_version > disk_version, + "can't upgrade a database that is being opened by an older or the same Zebra version:\n\ + disk: {disk_version}\n\ + upgrade: {format_upgrade_version}\n\ + running: {running_version}" + ); + + assert!( + format_upgrade_version > &disk_version, + "can't upgrade a database that has already been upgraded, or is newer:\n\ + disk: {disk_version}\n\ + upgrade: {format_upgrade_version}\n\ + running: {running_version}" + ); + + assert!( + format_upgrade_version <= &running_version, + "can't upgrade to a newer version than the running Zebra version:\n\ + disk: {disk_version}\n\ + upgrade: {format_upgrade_version}\n\ + running: {running_version}" + ); + + info!( + ?running_version, + ?format_upgrade_version, + ?disk_version, + "marking database format as upgraded" + ); + + write_database_format_version_to_disk(format_upgrade_version, config, network) + .expect("unable to write database format version file to disk"); } - /// Mark the database as fully upgraded. - /// This should be called after database format is up-to-date. + /// Mark the database as downgraded to the running database version. + /// This should be called after a newer database is opened by an older Zebra version. /// /// # Concurrency /// /// The version must only be updated while RocksDB is holding the database /// directory lock. This prevents multiple Zebra instances corrupting the version /// file. - fn mark_as_changed(config: &Config, network: Network) { - write_database_format_version_to_disk(config, network) + /// + /// # Panics + /// + /// If the format should have been upgraded, because the running version is newer. + /// If the state is newly created, because the running version should be the same. + /// + /// Multiple downgrades are allowed, because they all downgrade to the same running version. + fn mark_as_downgraded(config: &Config, network: Network) { + let disk_version = database_format_version_on_disk(config, network) + .expect("unable to read database format version file") + .expect("can't downgrade a newly created database"); + let running_version = database_format_version_in_code(); + + assert!( + disk_version >= running_version, + "can't downgrade a database that is being opened by a newer Zebra version:\n\ + disk: {disk_version}\n\ + running: {running_version}" + ); + + info!( + ?running_version, + ?disk_version, + "marking database format as downgraded" + ); + + write_database_format_version_to_disk(&running_version, config, network) .expect("unable to write database format version file to disk"); } } From 306ff1bd03eda1d2c624307e0c6f3a841b7611db Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 13:16:40 +1000 Subject: [PATCH 17/43] Add a kill_and_return_output() method for tests --- zebra-test/src/command.rs | 87 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/zebra-test/src/command.rs b/zebra-test/src/command.rs index dae47defba3..41f7b04478e 100644 --- a/zebra-test/src/command.rs +++ b/zebra-test/src/command.rs @@ -540,7 +540,9 @@ impl TestChild { // Read unread child output. // // This checks for failure logs, and prevents some test hangs and deadlocks. - if self.child.is_some() || self.stdout.is_some() { + // + // TODO: this could block if stderr is full and stdout is waiting for stderr to be read. + if self.stdout.is_some() { let wrote_lines = self.wait_for_stdout_line(format!("\n{} Child Stdout:", self.command_path)); @@ -552,7 +554,7 @@ impl TestChild { } } - if self.child.is_some() || self.stderr.is_some() { + if self.stderr.is_some() { let wrote_lines = self.wait_for_stderr_line(format!("\n{} Child Stderr:", self.command_path)); @@ -566,6 +568,56 @@ impl TestChild { kill_result } + /// Kill the process, and return all its remaining standard output and standard error output. + /// + /// If `ignore_exited` is `true`, log "can't kill an exited process" errors, + /// rather than returning them. + /// + /// Returns `Ok(output)`, or an error if the kill failed. + pub fn kill_and_return_output(&mut self, ignore_exited: bool) -> Result { + self.apply_failure_regexes_to_outputs(); + + // Prevent a hang when consuming output, + // by making sure the child's output actually finishes. + let kill_result = self.kill(ignore_exited); + + // Read unread child output. + let mut stdout_buf = String::new(); + let mut stderr_buf = String::new(); + + // This also checks for failure logs, and prevents some test hangs and deadlocks. + loop { + let mut remaining_output = false; + + if let Some(stdout) = self.stdout.as_mut() { + if let Some(line) = + Self::wait_and_return_output_line(stdout, self.bypass_test_capture) + { + stdout_buf.push_str(&line); + remaining_output = true; + } + } + + if let Some(stderr) = self.stderr.as_mut() { + if let Some(line) = + Self::wait_and_return_output_line(stderr, self.bypass_test_capture) + { + stderr_buf.push_str(&line); + remaining_output = true; + } + } + + if !remaining_output { + break; + } + } + + let mut output = stdout_buf; + output.push_str(&stderr_buf); + + kill_result.map(|()| output) + } + /// Waits until a line of standard output is available, then consumes it. /// /// If there is a line, and `write_context` is `Some`, writes the context to the test logs. @@ -632,15 +684,40 @@ impl TestChild { false } + /// Waits until a line of `output` is available, then returns it. + /// + /// If there is a line, and `write_context` is `Some`, writes the context to the test logs. + /// Always writes the line to the test logs. + /// + /// Returns `true` if a line was available, + /// or `false` if the standard output has finished. + #[allow(clippy::unwrap_in_result)] + fn wait_and_return_output_line( + mut output: impl Iterator>, + bypass_test_capture: bool, + ) -> Option { + if let Some(line_result) = output.next() { + let line_result = line_result.expect("failure reading test process logs"); + + Self::write_to_test_logs(&line_result, bypass_test_capture); + + return Some(line_result); + } + + None + } + /// Waits for the child process to exit, then returns its output. /// + /// # Correctness + /// /// The other test child output methods take one or both outputs, /// making them unavailable to this method. /// /// Ignores any configured timeouts. /// - /// Returns an error if the child has already been taken, - /// or both outputs have already been taken. + /// Returns an error if the child has already been taken. + /// TODO: return an error if both outputs have already been taken. #[spandoc::spandoc] pub fn wait_with_output(mut self) -> Result> { let child = match self.child.take() { @@ -708,6 +785,8 @@ impl TestChild { /// /// Kills the child on error, or after the configured timeout has elapsed. /// See [`Self::expect_line_matching_regex_set`] for details. + // + // TODO: these methods could block if stderr is full and stdout is waiting for stderr to be read #[instrument(skip(self))] #[allow(clippy::unwrap_in_result)] pub fn expect_stdout_line_matches(&mut self, success_regex: R) -> Result From ae4def63c6fbbbbbeb5c3d0821dc6fa36d958b54 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 6 Jul 2023 13:16:56 +1000 Subject: [PATCH 18/43] Add a test for new_state_format() versions (no upgrades or downgrades) --- zebrad/tests/acceptance.rs | 47 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index be913d1e741..eb0cc8b8186 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -166,8 +166,8 @@ use common::{ config_file_full_path, configs_dir, default_test_config, persistent_test_config, testdir, }, launch::{ - spawn_zebrad_for_rpc, ZebradTestDirExt, BETWEEN_NODES_DELAY, EXTENDED_LAUNCH_DELAY, - LAUNCH_DELAY, + spawn_zebrad_for_rpc, spawn_zebrad_without_rpc, ZebradTestDirExt, BETWEEN_NODES_DELAY, + EXTENDED_LAUNCH_DELAY, LAUNCH_DELAY, }, lightwalletd::{can_spawn_lightwalletd_for_rpc, spawn_lightwalletd_for_rpc}, sync::{ @@ -2381,6 +2381,7 @@ fn end_of_support_is_checked_at_start() -> Result<()> { Ok(()) } + /// Test `zebra-checkpoints` on mainnet. /// /// If you want to run this test individually, see the module documentation. @@ -2403,3 +2404,45 @@ async fn generate_checkpoints_mainnet() -> Result<()> { async fn generate_checkpoints_testnet() -> Result<()> { common::checkpoints::run(Testnet).await } + +/// Check that new states are created with the current state format. +#[tokio::test] +pub(crate) async fn new_state_format() -> Result<()> { + let _init_guard = zebra_test::init(); + + let test_name = "get_peer_info_test"; + let network = Network::Mainnet; + + let zebrad = spawn_zebrad_without_rpc(network, test_name, false, false)?; + + // Skip the test unless it has the required state and environmental variables. + let Some(mut zebrad) = zebrad else { + return Ok(()); + }; + + tracing::info!(?network, "running {} using zebrad", test_name); + + zebrad.expect_stdout_line_matches("creating new database with the current format")?; + let logs = zebrad.kill_and_return_output(false)?; + + assert!( + !logs.contains("marking database format as upgraded"), + "unexpected format upgrade in logs:\n\ + {logs}" + ); + assert!( + !logs.contains("marking database format as downgraded"), + "unexpected format downgrade in logs:\n\ + {logs}" + ); + + let output = zebrad.wait_with_output()?; + let output = output.assert_failure()?; + + // [Note on port conflict](#Note on port conflict) + output + .assert_was_killed() + .wrap_err("Possible port conflict. Are there other acceptance tests running?")?; + + Ok(()) +} From c5b476f3c31dc83487f95b35bc33bc1c4adc6314 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 05:44:05 +1000 Subject: [PATCH 19/43] Add use_internet_connection to can_spawn_zebrad_for_test_type() --- zebrad/tests/common/launch.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs index 371fb66ee48..dd07d89da04 100644 --- a/zebrad/tests/common/launch.rs +++ b/zebrad/tests/common/launch.rs @@ -229,7 +229,7 @@ pub fn spawn_zebrad_for_rpc + std::fmt::Debug>( let test_name = test_name.as_ref(); // Skip the test unless the user specifically asked for it - if !can_spawn_zebrad_for_test_type(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type, use_internet_connection) { return Ok(None); } @@ -287,7 +287,7 @@ pub fn spawn_zebrad_without_rpc + std::fmt::Debug>( }; // Skip the test unless the user specifically asked for it - if !can_spawn_zebrad_for_test_type(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type, use_internet_connection) { return Ok(None); } @@ -317,8 +317,9 @@ pub fn spawn_zebrad_without_rpc + std::fmt::Debug>( pub fn can_spawn_zebrad_for_test_type + std::fmt::Debug>( test_name: S, test_type: TestType, + use_internet_connection: bool, ) -> bool { - if zebra_test::net::zebra_skip_network_tests() { + if use_internet_connection && zebra_test::net::zebra_skip_network_tests() { return false; } From b1831dbc8140404c549f6d430ef659326241769c Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 05:46:43 +1000 Subject: [PATCH 20/43] Fix workflow parameter passing --- .github/workflows/continous-integration-docker.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/continous-integration-docker.yml b/.github/workflows/continous-integration-docker.yml index c58b3bee082..5454acba9a5 100644 --- a/.github/workflows/continous-integration-docker.yml +++ b/.github/workflows/continous-integration-docker.yml @@ -437,7 +437,7 @@ jobs: needs_zebra_state: true # update the disk on every PR, to increase CI speed saves_to_disk: true - force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk }} disk_suffix: tip root_state_path: '/var/cache' zebra_state_dir: 'zebrad-cache' @@ -510,7 +510,7 @@ jobs: is_long_test: true needs_zebra_state: false saves_to_disk: true - force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk }} disk_suffix: tip height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit @@ -554,7 +554,7 @@ jobs: # update the disk on every PR, to increase CI speed # we don't have a test-update-sync-testnet job, so we need to update the disk here saves_to_disk: true - force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk }} disk_suffix: tip root_state_path: '/var/cache' zebra_state_dir: 'zebrad-cache' @@ -587,7 +587,7 @@ jobs: needs_zebra_state: true needs_lwd_state: false saves_to_disk: true - force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk }} disk_prefix: lwd-cache disk_suffix: tip root_state_path: '/var/cache' From 219c5b576037647a2726bd045af96f8d9a9554fa Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 06:01:07 +1000 Subject: [PATCH 21/43] Check that reopening a new database doesn't upgrade (or downgrade) the format --- .../finalized_state/disk_format/upgrade.rs | 2 +- zebra-test/src/command.rs | 5 +++ zebrad/tests/acceptance.rs | 44 +++++++++++++++++-- zebrad/tests/common/launch.rs | 24 +++++++--- 4 files changed, 65 insertions(+), 10 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index fc6c3c9b1f2..2c29aed0a12 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -108,7 +108,7 @@ impl DbFormatChange { }) } Ordering::Equal => { - info!(?running_version, "trying to open matching database format"); + info!(?running_version, "trying to open current database format"); None } diff --git a/zebra-test/src/command.rs b/zebra-test/src/command.rs index 41f7b04478e..f39ebf47129 100644 --- a/zebra-test/src/command.rs +++ b/zebra-test/src/command.rs @@ -1372,6 +1372,11 @@ impl TestOutput { fn was_killed(&self) -> bool { self.output.status.signal() == Some(9) } + + /// Takes the generic `dir` paramter out of this `TestOutput`. + pub fn take_dir(&mut self) -> Option { + self.dir.take() + } } /// Add context to an error report diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index eb0cc8b8186..37a12c95777 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2405,15 +2405,18 @@ async fn generate_checkpoints_testnet() -> Result<()> { common::checkpoints::run(Testnet).await } -/// Check that new states are created with the current state format. +/// Check that new states are created with the current state format version, +/// and that restarting `zebrad` doesn't change the format version. #[tokio::test] pub(crate) async fn new_state_format() -> Result<()> { let _init_guard = zebra_test::init(); - let test_name = "get_peer_info_test"; + let test_name = "new_state_format_test/new"; let network = Network::Mainnet; - let zebrad = spawn_zebrad_without_rpc(network, test_name, false, false)?; + // # Create a new state and check it has the current version + + let zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, None)?; // Skip the test unless it has the required state and environmental variables. let Some(mut zebrad) = zebrad else { @@ -2436,6 +2439,41 @@ pub(crate) async fn new_state_format() -> Result<()> { {logs}" ); + let output = zebrad.wait_with_output()?; + let mut output = output.assert_failure()?; + + let dir = output + .take_dir() + .expect("dir should not already have been taken"); + + // [Note on port conflict](#Note on port conflict) + output + .assert_was_killed() + .wrap_err("Possible port conflict. Are there other acceptance tests running?")?; + + // # Reopen that state and check the version hasn't changed + + let test_name = "new_state_format_test/reopen"; + + let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, dir)? + .expect("unexpectedly missing required state or env vars"); + + tracing::info!(?network, "running {} using zebrad", test_name); + + zebrad.expect_stdout_line_matches("trying to open current database format")?; + let logs = zebrad.kill_and_return_output(false)?; + + assert!( + !logs.contains("marking database format as upgraded"), + "unexpected format upgrade in logs:\n\ + {logs}" + ); + assert!( + !logs.contains("marking database format as downgraded"), + "unexpected format downgrade in logs:\n\ + {logs}" + ); + let output = zebrad.wait_with_output()?; let output = output.assert_failure()?; diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs index dd07d89da04..a5b5291f214 100644 --- a/zebrad/tests/common/launch.rs +++ b/zebrad/tests/common/launch.rs @@ -7,6 +7,7 @@ use std::{ env, + fmt::Debug, net::SocketAddr, path::{Path, PathBuf}, time::Duration, @@ -220,7 +221,7 @@ where /// /// `zebra_rpc_address` is `None` if the test type doesn't need an RPC port. #[tracing::instrument] -pub fn spawn_zebrad_for_rpc + std::fmt::Debug>( +pub fn spawn_zebrad_for_rpc + Debug>( network: Network, test_name: S, test_type: TestType, @@ -263,17 +264,24 @@ pub fn spawn_zebrad_for_rpc + std::fmt::Debug>( /// This prevents it from downloading blocks. Instead, use the `ZEBRA_CACHED_STATE_DIR` /// environmental variable to provide an initial state to the zebrad instance. /// +/// If `reuse_state_path` is `Some(tempdir)`, use that directory as the state path. +/// /// Returns: /// - `Ok(Some(zebrad))` on success, /// - `Ok(None)` if the test doesn't have the required network or cached state, and /// - `Err(_)` if spawning zebrad fails. #[tracing::instrument] -pub fn spawn_zebrad_without_rpc + std::fmt::Debug>( +pub fn spawn_zebrad_without_rpc( network: Network, - test_name: S, + test_name: Str, use_cached_state: bool, use_internet_connection: bool, -) -> Result>> { + reuse_state_path: Dir, +) -> Result>> +where + Str: AsRef + Debug, + Dir: Into> + Debug, +{ use TestType::*; let test_name = test_name.as_ref(); @@ -300,9 +308,13 @@ pub fn spawn_zebrad_without_rpc + std::fmt::Debug>( let (zebrad_failure_messages, zebrad_ignore_messages) = test_type.zebrad_failure_messages(); + let testdir = reuse_state_path + .into() + .unwrap_or_else(|| testdir().expect("failed to create test temporary directory")); + // Writes a configuration that does not have RPC listen_addr set. // If the state path env var is set, uses it in the config. - let zebrad = testdir()? + let zebrad = testdir .with_exact_config(&config)? .spawn_child(args!["start"])? .bypass_test_capture(true) @@ -314,7 +326,7 @@ pub fn spawn_zebrad_without_rpc + std::fmt::Debug>( /// Returns `true` if a zebrad test for `test_type` has everything it needs to run. #[tracing::instrument] -pub fn can_spawn_zebrad_for_test_type + std::fmt::Debug>( +pub fn can_spawn_zebrad_for_test_type + Debug>( test_name: S, test_type: TestType, use_internet_connection: bool, From 29b7da31e723a105f6e4f5c247a9834d6a4a04c0 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 06:12:30 +1000 Subject: [PATCH 22/43] Allow ephemeral to be set to false even if we don't have a cached state --- zebrad/tests/acceptance.rs | 4 ++-- zebrad/tests/common/launch.rs | 10 ++++++---- zebrad/tests/common/test_type.rs | 8 ++++++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 37a12c95777..f150620f502 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2416,7 +2416,7 @@ pub(crate) async fn new_state_format() -> Result<()> { // # Create a new state and check it has the current version - let zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, None)?; + let zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, false, None)?; // Skip the test unless it has the required state and environmental variables. let Some(mut zebrad) = zebrad else { @@ -2455,7 +2455,7 @@ pub(crate) async fn new_state_format() -> Result<()> { let test_name = "new_state_format_test/reopen"; - let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, dir)? + let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, false, dir)? .expect("unexpectedly missing required state or env vars"); tracing::info!(?network, "running {} using zebrad", test_name); diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs index a5b5291f214..f5dcda8b07c 100644 --- a/zebrad/tests/common/launch.rs +++ b/zebrad/tests/common/launch.rs @@ -236,7 +236,7 @@ pub fn spawn_zebrad_for_rpc + Debug>( // Get the zebrad config let mut config = test_type - .zebrad_config(test_name, use_internet_connection) + .zebrad_config(test_name, use_internet_connection, true) .expect("already checked config")?; config.network.network = network; @@ -276,6 +276,7 @@ pub fn spawn_zebrad_without_rpc( test_name: Str, use_cached_state: bool, use_internet_connection: bool, + ephemeral: bool, reuse_state_path: Dir, ) -> Result>> where @@ -301,7 +302,7 @@ where // Get the zebrad config let mut config = test_type - .zebrad_config(test_name, use_internet_connection) + .zebrad_config(test_name, use_internet_connection, ephemeral) .expect("already checked config")?; config.network.network = network; @@ -342,8 +343,9 @@ pub fn can_spawn_zebrad_for_test_type + Debug>( return false; } - // Check if we have any necessary cached states for the zebrad config - test_type.zebrad_config(test_name, true).is_some() + // Check if we have any necessary cached states for the zebrad config. + // The ephemeral value doesn't matter here. + test_type.zebrad_config(test_name, true, true).is_some() } /// Panics if `$pred` is false, with an error report containing: diff --git a/zebrad/tests/common/test_type.rs b/zebrad/tests/common/test_type.rs index 90fc6f0c78b..506955f44a5 100644 --- a/zebrad/tests/common/test_type.rs +++ b/zebrad/tests/common/test_type.rs @@ -155,12 +155,15 @@ impl TestType { /// Returns a Zebra config for this test. /// + /// `ephemeral` is ignored if the test is using a cached state. + /// /// Returns `None` if the test should be skipped, /// and `Some(Err(_))` if the config could not be created. - pub fn zebrad_config>( + pub fn zebrad_config>( &self, - test_name: S, + test_name: Str, use_internet_connection: bool, + ephemeral: bool, ) -> Option> { let config = if self.needs_zebra_rpc_server() { // This is what we recommend our users configure. @@ -201,6 +204,7 @@ impl TestType { ); if !self.needs_zebra_cached_state() { + config.state.ephemeral = ephemeral; return Some(Ok(config)); } From fb96389e0ef74e9b8a1e490112433fa1a3817554 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 06:24:03 +1000 Subject: [PATCH 23/43] Add a test type that will accept any kind of state --- zebrad/tests/common/launch.rs | 6 ++++-- zebrad/tests/common/test_type.rs | 26 +++++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs index f5dcda8b07c..90c12bf33fe 100644 --- a/zebrad/tests/common/launch.rs +++ b/zebrad/tests/common/launch.rs @@ -286,13 +286,16 @@ where use TestType::*; let test_name = test_name.as_ref(); + let reuse_state_path = reuse_state_path.into(); let test_type = if use_cached_state { UpdateZebraCachedStateNoRpc - } else { + } else if ephemeral || reuse_state_path.is_none() { LaunchWithEmptyState { launches_lightwalletd: false, } + } else { + UseAnyState }; // Skip the test unless the user specifically asked for it @@ -310,7 +313,6 @@ where let (zebrad_failure_messages, zebrad_ignore_messages) = test_type.zebrad_failure_messages(); let testdir = reuse_state_path - .into() .unwrap_or_else(|| testdir().expect("failed to create test temporary directory")); // Writes a configuration that does not have RPC listen_addr set. diff --git a/zebrad/tests/common/test_type.rs b/zebrad/tests/common/test_type.rs index 506955f44a5..22c072c05cd 100644 --- a/zebrad/tests/common/test_type.rs +++ b/zebrad/tests/common/test_type.rs @@ -44,6 +44,9 @@ pub enum TestType { allow_lightwalletd_cached_state: bool, }, + /// Launch with a Zebra and lightwalletd state that might or might not be empty. + UseAnyState, + /// Sync to tip from a lightwalletd cached state. /// /// This test requires a cached Zebra and lightwalletd state. @@ -72,7 +75,7 @@ impl TestType { // - FullSyncFromGenesis, UpdateCachedState, UpdateZebraCachedStateNoRpc: // skip the test if it is not available match self { - LaunchWithEmptyState { .. } => false, + LaunchWithEmptyState { .. } | UseAnyState => false, FullSyncFromGenesis { .. } | UpdateCachedState | UpdateZebraCachedStateNoRpc @@ -84,16 +87,17 @@ impl TestType { pub fn needs_zebra_rpc_server(&self) -> bool { match self { UpdateZebraCachedStateWithRpc | LaunchWithEmptyState { .. } => true, - UpdateZebraCachedStateNoRpc | FullSyncFromGenesis { .. } | UpdateCachedState => { - self.launches_lightwalletd() - } + UseAnyState + | UpdateZebraCachedStateNoRpc + | FullSyncFromGenesis { .. } + | UpdateCachedState => self.launches_lightwalletd(), } } /// Does this test launch `lightwalletd`? pub fn launches_lightwalletd(&self) -> bool { match self { - UpdateZebraCachedStateNoRpc | UpdateZebraCachedStateWithRpc => false, + UseAnyState | UpdateZebraCachedStateNoRpc | UpdateZebraCachedStateWithRpc => false, FullSyncFromGenesis { .. } | UpdateCachedState => true, LaunchWithEmptyState { launches_lightwalletd, @@ -109,6 +113,7 @@ impl TestType { // - UpdateCachedState: skip the test if it is not available match self { LaunchWithEmptyState { .. } + | UseAnyState | FullSyncFromGenesis { .. } | UpdateZebraCachedStateNoRpc | UpdateZebraCachedStateWithRpc => false, @@ -123,14 +128,17 @@ impl TestType { FullSyncFromGenesis { allow_lightwalletd_cached_state, } => *allow_lightwalletd_cached_state, - UpdateCachedState | UpdateZebraCachedStateNoRpc | UpdateZebraCachedStateWithRpc => true, + UseAnyState + | UpdateCachedState + | UpdateZebraCachedStateNoRpc + | UpdateZebraCachedStateWithRpc => true, } } /// Can this test create a new `LIGHTWALLETD_DATA_DIR` cached state? pub fn can_create_lightwalletd_cached_state(&self) -> bool { match self { - LaunchWithEmptyState { .. } => false, + LaunchWithEmptyState { .. } | UseAnyState => false, FullSyncFromGenesis { .. } | UpdateCachedState => true, UpdateZebraCachedStateNoRpc | UpdateZebraCachedStateWithRpc => false, } @@ -260,7 +268,7 @@ impl TestType { /// Returns the `zebrad` timeout for this test type. pub fn zebrad_timeout(&self) -> Duration { match self { - LaunchWithEmptyState { .. } => LIGHTWALLETD_DELAY, + LaunchWithEmptyState { .. } | UseAnyState => LIGHTWALLETD_DELAY, FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY, UpdateCachedState | UpdateZebraCachedStateNoRpc => LIGHTWALLETD_UPDATE_TIP_DELAY, UpdateZebraCachedStateWithRpc => FINISH_PARTIAL_SYNC_TIMEOUT, @@ -277,7 +285,7 @@ impl TestType { // We use the same timeouts for zebrad and lightwalletd, // because the tests check zebrad and lightwalletd concurrently. match self { - LaunchWithEmptyState { .. } => LIGHTWALLETD_DELAY, + LaunchWithEmptyState { .. } | UseAnyState => LIGHTWALLETD_DELAY, FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY, UpdateCachedState | UpdateZebraCachedStateNoRpc | UpdateZebraCachedStateWithRpc => { LIGHTWALLETD_UPDATE_TIP_DELAY From 3c4d1132fdc51d859690d73b2e71e75cfab778b4 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 11:31:43 +1000 Subject: [PATCH 24/43] When re-using a directory, configure the state test config with that path --- zebrad/tests/acceptance.rs | 4 ++-- zebrad/tests/common/launch.rs | 40 ++++++++++++++++++-------------- zebrad/tests/common/test_type.rs | 32 ++++++++++++++----------- 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index f150620f502..f6d56c21da0 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2416,7 +2416,7 @@ pub(crate) async fn new_state_format() -> Result<()> { // # Create a new state and check it has the current version - let zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, false, None)?; + let zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, None, false)?; // Skip the test unless it has the required state and environmental variables. let Some(mut zebrad) = zebrad else { @@ -2455,7 +2455,7 @@ pub(crate) async fn new_state_format() -> Result<()> { let test_name = "new_state_format_test/reopen"; - let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, false, dir)? + let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, dir, false)? .expect("unexpectedly missing required state or env vars"); tracing::info!(?network, "running {} using zebrad", test_name); diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs index 90c12bf33fe..315a7e3cb6a 100644 --- a/zebrad/tests/common/launch.rs +++ b/zebrad/tests/common/launch.rs @@ -236,7 +236,7 @@ pub fn spawn_zebrad_for_rpc + Debug>( // Get the zebrad config let mut config = test_type - .zebrad_config(test_name, use_internet_connection, true) + .zebrad_config(test_name, use_internet_connection, None) .expect("already checked config")?; config.network.network = network; @@ -258,14 +258,15 @@ pub fn spawn_zebrad_for_rpc + Debug>( /// Spawns a zebrad instance on `network` without RPCs or `lightwalletd`. /// /// If `use_cached_state` is `true`, then update the cached state to the tip. -/// Otherwise, just create an empty state. +/// If `ephemeral` is `true`, then use an ephemeral state path. +/// If `reuse_state_path` is `Some(path)`, then use the state at that path, and take ownership of +/// the temporary directory, so it isn't deleted until the test ends. +/// Otherwise, just create an empty state in this test's new temporary directory. /// /// If `use_internet_connection` is `false` then spawn, but without any peers. /// This prevents it from downloading blocks. Instead, use the `ZEBRA_CACHED_STATE_DIR` /// environmental variable to provide an initial state to the zebrad instance. /// -/// If `reuse_state_path` is `Some(tempdir)`, use that directory as the state path. -/// /// Returns: /// - `Ok(Some(zebrad))` on success, /// - `Ok(None)` if the test doesn't have the required network or cached state, and @@ -275,9 +276,9 @@ pub fn spawn_zebrad_without_rpc( network: Network, test_name: Str, use_cached_state: bool, - use_internet_connection: bool, ephemeral: bool, reuse_state_path: Dir, + use_internet_connection: bool, ) -> Result>> where Str: AsRef + Debug, @@ -286,16 +287,22 @@ where use TestType::*; let test_name = test_name.as_ref(); + let reuse_state_path = reuse_state_path.into(); + let testdir = reuse_state_path + .unwrap_or_else(|| testdir().expect("failed to create test temporary directory")); - let test_type = if use_cached_state { - UpdateZebraCachedStateNoRpc - } else if ephemeral || reuse_state_path.is_none() { - LaunchWithEmptyState { - launches_lightwalletd: false, - } + let (test_type, replace_cache_dir) = if use_cached_state { + (UpdateZebraCachedStateNoRpc, None) + } else if ephemeral { + ( + LaunchWithEmptyState { + launches_lightwalletd: false, + }, + None, + ) } else { - UseAnyState + (UseAnyState, Some(testdir.path())) }; // Skip the test unless the user specifically asked for it @@ -305,16 +312,13 @@ where // Get the zebrad config let mut config = test_type - .zebrad_config(test_name, use_internet_connection, ephemeral) + .zebrad_config(test_name, use_internet_connection, replace_cache_dir) .expect("already checked config")?; config.network.network = network; let (zebrad_failure_messages, zebrad_ignore_messages) = test_type.zebrad_failure_messages(); - let testdir = reuse_state_path - .unwrap_or_else(|| testdir().expect("failed to create test temporary directory")); - // Writes a configuration that does not have RPC listen_addr set. // If the state path env var is set, uses it in the config. let zebrad = testdir @@ -346,8 +350,8 @@ pub fn can_spawn_zebrad_for_test_type + Debug>( } // Check if we have any necessary cached states for the zebrad config. - // The ephemeral value doesn't matter here. - test_type.zebrad_config(test_name, true, true).is_some() + // The cache_dir value doesn't matter here. + test_type.zebrad_config(test_name, true, None).is_some() } /// Panics if `$pred` is false, with an error report containing: diff --git a/zebrad/tests/common/test_type.rs b/zebrad/tests/common/test_type.rs index 22c072c05cd..adb5fd4b897 100644 --- a/zebrad/tests/common/test_type.rs +++ b/zebrad/tests/common/test_type.rs @@ -1,6 +1,10 @@ //! Provides TestType enum with shared code for acceptance tests -use std::{env, path::PathBuf, time::Duration}; +use std::{ + env, + path::{Path, PathBuf}, + time::Duration, +}; use indexmap::IndexSet; @@ -163,7 +167,7 @@ impl TestType { /// Returns a Zebra config for this test. /// - /// `ephemeral` is ignored if the test is using a cached state. + /// `replace_cache_dir` replaces any cached or ephemeral state. /// /// Returns `None` if the test should be skipped, /// and `Some(Err(_))` if the config could not be created. @@ -171,7 +175,7 @@ impl TestType { &self, test_name: Str, use_internet_connection: bool, - ephemeral: bool, + replace_cache_dir: Option<&Path>, ) -> Option> { let config = if self.needs_zebra_rpc_server() { // This is what we recommend our users configure. @@ -211,19 +215,19 @@ impl TestType { zebra_chain::transparent::Address::from_script_hash(config.network.network, [0x7e; 20]), ); - if !self.needs_zebra_cached_state() { - config.state.ephemeral = ephemeral; - return Some(Ok(config)); - } + // If we have a cached state, or we don't want to be ephemeral, update the config to use it + if replace_cache_dir.is_some() || self.needs_zebra_cached_state() { + let zebra_state_path = replace_cache_dir + .map(|path| path.to_owned()) + .or_else(|| self.zebrad_state_path(test_name))?; - // If we have a cached state, update the config to use it - let zebra_state_path = self.zebrad_state_path(test_name)?; - config.state.ephemeral = false; - config.state.cache_dir = zebra_state_path; + config.state.ephemeral = false; + config.state.cache_dir = zebra_state_path; - // And reset the concurrency to the default value - config.sync.checkpoint_verify_concurrency_limit = - zebrad::components::sync::DEFAULT_CHECKPOINT_CONCURRENCY_LIMIT; + // And reset the concurrency to the default value + config.sync.checkpoint_verify_concurrency_limit = + zebrad::components::sync::DEFAULT_CHECKPOINT_CONCURRENCY_LIMIT; + } Some(Ok(config)) } From bf0550ffda4eacb21d27be8db909860d10e8bafc Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 12:17:29 +1000 Subject: [PATCH 25/43] Actually mark newly created databases with their format versions --- .../finalized_state/disk_format/upgrade.rs | 102 ++++++++++++++---- 1 file changed, 84 insertions(+), 18 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 2c29aed0a12..1c83c527c8e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -22,6 +22,11 @@ use crate::{ /// The kind of database format change we're performing. #[derive(Clone, Debug, Eq, PartialEq)] pub enum DbFormatChange { + /// Marking the format as newly created by `running_version`. + /// + /// Newly created databases have no disk version. + NewlyCreated { running_version: Version }, + /// Upgrading the format from `older_disk_version` to `newer_running_version`. /// /// Until this upgrade is complete, the format is a mixture of both versions. @@ -79,7 +84,7 @@ impl DbFormatChange { "creating new database with the current format" ); - return None; + return Some(NewlyCreated { running_version }); }; match disk_version.cmp(&running_version) { @@ -154,11 +159,51 @@ impl DbFormatChange { /// Apply this format change to the database. /// Format changes should be launched in an independent `std::thread`. /// + /// See `apply_format_upgrade()` for details. + fn apply_format_change( + self, + config: Config, + network: Network, + initial_tip_height: Option, + cancel_receiver: mpsc::Receiver, + ) { + match self { + // Handled in the rest of this function. + Upgrade { .. } => { + self.apply_format_upgrade(config, network, initial_tip_height, cancel_receiver) + } + + NewlyCreated { .. } => { + Self::mark_as_newly_created(&config, network); + + return; + } + Downgrade { .. } => { + // # Correctness + // + // At the start of a format downgrade, the database must be marked as partially or + // fully downgraded. This lets newer Zebra versions know that some blocks with older + // formats have been added to the database. + Self::mark_as_downgraded(&config, network); + + // Older supported versions just assume they can read newer formats, + // because they can't predict all changes a newer Zebra version could make. + // + // The resposibility of staying backwards-compatible is on the newer version. + // We do this on a best-effort basis for versions that are still supported. + return; + } + } + } + + /// Apply any required format updates to the database. + /// Format changes should be launched in an independent `std::thread`. + /// /// If `cancel_receiver` gets a message, or its sender is dropped, /// the format change stops running early. // - // New format changes must be added to the *end* of this method. - fn apply_format_change( + // New format upgrades must be added to the *end* of this method. + fn apply_format_upgrade( self, config: Config, network: Network, @@ -170,21 +215,7 @@ impl DbFormatChange { older_disk_version, } = self else { - // If it's not an upgrade, it's a downgrade. - - // # Correctness - // - // At the start of a format downgrade, the database must be marked as partially or - // fully downgraded. This lets newer Zebra versions know that some blocks with older - // formats have been added to the database. - Self::mark_as_downgraded(&config, network); - - // Older supported versions just assume they can read newer formats, - // because they can't predict all changes a newer Zebra version could make. - // - // The resposibility of staying backwards-compatible is on the newer version. - // We do this on a best-effort basis for versions that are still supported. - return; + unreachable!("already checked for Upgrade") }; // # New Upgrades Sometimes Go Here @@ -249,6 +280,41 @@ impl DbFormatChange { // every time it runs its inner update loop. } + /// Mark a newly created database with the current format version. + /// + /// This should be called when a newly created database is opened. + /// + /// # Concurrency + /// + /// The version must only be updated while RocksDB is holding the database + /// directory lock. This prevents multiple Zebra instances corrupting the version + /// file. + /// + /// # Panics + /// + /// If the format should not have been upgraded, because the database is not newly created. + fn mark_as_newly_created(config: &Config, network: Network) { + let disk_version = database_format_version_on_disk(config, network) + .expect("unable to read database format version file path"); + let running_version = database_format_version_in_code(); + + assert_eq!( + disk_version, None, + "can't overwrite the format version in an existing database:\n\ + disk: {disk_version:?}\n\ + running: {running_version}" + ); + + info!( + ?running_version, + ?disk_version, + "marking database format as newly created" + ); + + write_database_format_version_to_disk(&running_version, config, network) + .expect("unable to write database format version file to disk"); + } + /// Mark the database as upgraded to `format_upgrade_version`. /// /// This should be called when an older database is opened by an older Zebra version, From 6d251b0b7517cb9fc4630d66d7e20ed3ccfee454 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 12:17:55 +1000 Subject: [PATCH 26/43] Wait for the state to be opened before testing the format --- zebrad/tests/acceptance.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index f6d56c21da0..88de2798edc 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2426,6 +2426,8 @@ pub(crate) async fn new_state_format() -> Result<()> { tracing::info!(?network, "running {} using zebrad", test_name); zebrad.expect_stdout_line_matches("creating new database with the current format")?; + zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; + let logs = zebrad.kill_and_return_output(false)?; assert!( @@ -2461,6 +2463,8 @@ pub(crate) async fn new_state_format() -> Result<()> { tracing::info!(?network, "running {} using zebrad", test_name); zebrad.expect_stdout_line_matches("trying to open current database format")?; + zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; + let logs = zebrad.kill_and_return_output(false)?; assert!( From 1176b4d38dc47d136f6d5b1bec9b4cb6e0704f44 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 12:26:40 +1000 Subject: [PATCH 27/43] Run state format tests on mainnet and testnet configs (no network access) --- zebrad/tests/acceptance.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 88de2798edc..74038323ea9 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2408,11 +2408,18 @@ async fn generate_checkpoints_testnet() -> Result<()> { /// Check that new states are created with the current state format version, /// and that restarting `zebrad` doesn't change the format version. #[tokio::test] -pub(crate) async fn new_state_format() -> Result<()> { +async fn new_state_format() -> Result<()> { + for network in [Mainnet, Testnet] { + state_format_test("new_state_format_test", network).await?; + } + + Ok(()) +} + +async fn state_format_test(base_test_name: &str, network: Network) -> Result<()> { let _init_guard = zebra_test::init(); - let test_name = "new_state_format_test/new"; - let network = Network::Mainnet; + let test_name = &format!("{base_test_name}/new"); // # Create a new state and check it has the current version @@ -2423,7 +2430,7 @@ pub(crate) async fn new_state_format() -> Result<()> { return Ok(()); }; - tracing::info!(?network, "running {} using zebrad", test_name); + tracing::info!(?network, "running {test_name} using zebrad"); zebrad.expect_stdout_line_matches("creating new database with the current format")?; zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; @@ -2455,12 +2462,12 @@ pub(crate) async fn new_state_format() -> Result<()> { // # Reopen that state and check the version hasn't changed - let test_name = "new_state_format_test/reopen"; + let test_name = &format!("{base_test_name}/reopen"); let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, dir, false)? .expect("unexpectedly missing required state or env vars"); - tracing::info!(?network, "running {} using zebrad", test_name); + tracing::info!(?network, "running {test_name} using zebrad"); zebrad.expect_stdout_line_matches("trying to open current database format")?; zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; From f302f553bf1236f67bc7470db4a1556eebdd1620 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 12:29:48 +1000 Subject: [PATCH 28/43] run multiple reopens in tests --- zebrad/tests/acceptance.rs | 57 ++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 74038323ea9..3e17cb43daf 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2410,13 +2410,17 @@ async fn generate_checkpoints_testnet() -> Result<()> { #[tokio::test] async fn new_state_format() -> Result<()> { for network in [Mainnet, Testnet] { - state_format_test("new_state_format_test", network).await?; + state_format_test("new_state_format_test", network, 2).await?; } Ok(()) } -async fn state_format_test(base_test_name: &str, network: Network) -> Result<()> { +async fn state_format_test( + base_test_name: &str, + network: Network, + reopen_count: usize, +) -> Result<()> { let _init_guard = zebra_test::init(); let test_name = &format!("{base_test_name}/new"); @@ -2451,7 +2455,7 @@ async fn state_format_test(base_test_name: &str, network: Network) -> Result<()> let output = zebrad.wait_with_output()?; let mut output = output.assert_failure()?; - let dir = output + let mut dir = output .take_dir() .expect("dir should not already have been taken"); @@ -2462,36 +2466,41 @@ async fn state_format_test(base_test_name: &str, network: Network) -> Result<()> // # Reopen that state and check the version hasn't changed - let test_name = &format!("{base_test_name}/reopen"); + for _ in 0..reopen_count { + let test_name = &format!("{base_test_name}/reopen/{reopen_count}"); - let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, dir, false)? - .expect("unexpectedly missing required state or env vars"); + let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, dir, false)? + .expect("unexpectedly missing required state or env vars"); - tracing::info!(?network, "running {test_name} using zebrad"); + tracing::info!(?network, "running {test_name} using zebrad"); - zebrad.expect_stdout_line_matches("trying to open current database format")?; - zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; + zebrad.expect_stdout_line_matches("trying to open current database format")?; + zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; - let logs = zebrad.kill_and_return_output(false)?; + let logs = zebrad.kill_and_return_output(false)?; - assert!( - !logs.contains("marking database format as upgraded"), - "unexpected format upgrade in logs:\n\ + assert!( + !logs.contains("marking database format as upgraded"), + "unexpected format upgrade in logs:\n\ {logs}" - ); - assert!( - !logs.contains("marking database format as downgraded"), - "unexpected format downgrade in logs:\n\ + ); + assert!( + !logs.contains("marking database format as downgraded"), + "unexpected format downgrade in logs:\n\ {logs}" - ); + ); - let output = zebrad.wait_with_output()?; - let output = output.assert_failure()?; + let output = zebrad.wait_with_output()?; + let mut output = output.assert_failure()?; - // [Note on port conflict](#Note on port conflict) - output - .assert_was_killed() - .wrap_err("Possible port conflict. Are there other acceptance tests running?")?; + dir = output + .take_dir() + .expect("dir should not already have been taken"); + // [Note on port conflict](#Note on port conflict) + output + .assert_was_killed() + .wrap_err("Possible port conflict. Are there other acceptance tests running?")?; + } Ok(()) } From 8137d37e9133e49f3e8953122d6bf6ea70ef2f75 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 12:53:27 +1000 Subject: [PATCH 29/43] Test upgrades run correctly --- zebra-state/src/lib.rs | 3 + .../finalized_state/disk_format/upgrade.rs | 17 ++++- zebrad/tests/acceptance.rs | 65 +++++++++++++++---- 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index 44a337b88da..eedb90d1328 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -60,4 +60,7 @@ pub use service::{ init_test, init_test_services, ReadStateService, }; +#[cfg(any(test, feature = "proptest-impl"))] +pub use config::write_database_format_version_to_disk; + pub(crate) use request::ContextuallyVerifiedBlock; diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 1c83c527c8e..1b16d02e327 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -175,8 +175,6 @@ impl DbFormatChange { NewlyCreated { .. } => { Self::mark_as_newly_created(&config, network); - - return; } Downgrade { .. } => { // # Correctness @@ -191,7 +189,6 @@ impl DbFormatChange { // // The resposibility of staying backwards-compatible is on the newer version. // We do this on a best-effort basis for versions that are still supported. - return; } } } @@ -230,6 +227,14 @@ impl DbFormatChange { ); Self::mark_as_upgraded_to(&database_format_version_in_code(), &config, network); + + info!( + ?initial_tip_height, + ?newer_running_version, + ?older_disk_version, + "database is fully upgraded" + ); + return; }; @@ -278,6 +283,12 @@ impl DbFormatChange { // Run the latest format upgrade code after the other upgrades are complete, // then mark the format as upgraded. The code should check `cancel_receiver` // every time it runs its inner update loop. + info!( + ?initial_tip_height, + ?newer_running_version, + ?older_disk_version, + "database is fully upgraded" + ); } /// Mark a newly created database with the current format version. diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 3e17cb43daf..4ad2a0ea6d6 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -146,6 +146,7 @@ use color_eyre::{ eyre::{eyre, Result, WrapErr}, Help, }; +use semver::Version; use zebra_chain::{ block::{self, Height}, @@ -153,7 +154,7 @@ use zebra_chain::{ }; use zebra_network::constants::PORT_IN_USE_ERROR; use zebra_node_services::rpc_client::RpcRequestClient; -use zebra_state::constants::LOCK_FILE_ERROR; +use zebra_state::{constants::LOCK_FILE_ERROR, database_format_version_in_code}; use zebra_test::{args, command::ContextFrom, net::random_known_port, prelude::*}; @@ -2410,16 +2411,33 @@ async fn generate_checkpoints_testnet() -> Result<()> { #[tokio::test] async fn new_state_format() -> Result<()> { for network in [Mainnet, Testnet] { - state_format_test("new_state_format_test", network, 2).await?; + state_format_test("new_state_format_test", network, 2, None).await?; } Ok(()) } +/// Check that outdated states are updated to the current state format version, +/// and that restarting `zebrad` doesn't change the updated format version. +#[tokio::test] +async fn update_state_format() -> Result<()> { + let mut fake_version = database_format_version_in_code(); + fake_version.minor = 0; + fake_version.patch = 0; + + for network in [Mainnet, Testnet] { + state_format_test("update_state_format_test", network, 3, Some(&fake_version)).await?; + } + + Ok(()) +} + +/// Test state format changes, see calling tests for details. async fn state_format_test( base_test_name: &str, network: Network, reopen_count: usize, + fake_version: Option<&Version>, ) -> Result<()> { let _init_guard = zebra_test::init(); @@ -2464,26 +2482,51 @@ async fn state_format_test( .assert_was_killed() .wrap_err("Possible port conflict. Are there other acceptance tests running?")?; + // # Apply the fake version if needed + + if let Some(fake_version) = fake_version { + let test_name = &format!("{base_test_name}/apply_fake_version"); + tracing::info!(?network, "running {test_name} using zebra-state"); + + let mut config = UseAnyState + .zebrad_config(test_name, false, Some(dir.path())) + .expect("already checked config")?; + config.network.network = network; + + zebra_state::write_database_format_version_to_disk(fake_version, &config.state, network) + .expect("can't write fake database version to disk"); + } + // # Reopen that state and check the version hasn't changed - for _ in 0..reopen_count { - let test_name = &format!("{base_test_name}/reopen/{reopen_count}"); + for reopened in 0..reopen_count { + let expect_older_version = fake_version.is_some() && reopened == 0; + let test_name = &format!("{base_test_name}/reopen/{reopened}"); let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, dir, false)? .expect("unexpectedly missing required state or env vars"); tracing::info!(?network, "running {test_name} using zebrad"); - zebrad.expect_stdout_line_matches("trying to open current database format")?; - zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; + if expect_older_version { + zebrad.expect_stdout_line_matches("trying to open older database format")?; + zebrad.expect_stdout_line_matches("marking database format as upgraded")?; + zebrad.expect_stdout_line_matches("database is fully upgraded")?; + } else { + zebrad.expect_stdout_line_matches("trying to open current database format")?; + zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; + } let logs = zebrad.kill_and_return_output(false)?; - assert!( - !logs.contains("marking database format as upgraded"), - "unexpected format upgrade in logs:\n\ - {logs}" - ); + if !expect_older_version { + assert!( + !logs.contains("marking database format as upgraded"), + "unexpected format upgrade in logs:\n\ + {logs}" + ); + } + assert!( !logs.contains("marking database format as downgraded"), "unexpected format downgrade in logs:\n\ From c3abfc02be863663ae21e0e26f35d0a6ccc8d929 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 13:06:58 +1000 Subject: [PATCH 30/43] Test that version downgrades work as expected (best effort) --- zebrad/tests/acceptance.rs | 56 +++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 4ad2a0ea6d6..fe2ef37e781 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2432,6 +2432,29 @@ async fn update_state_format() -> Result<()> { Ok(()) } +/// Check that newer state formats are downgraded to the current state format version, +/// and that restarting `zebrad` doesn't change the format version. +/// +/// Future version compatibility is a best-effort attempt, this test can be disabled if it fails. +#[tokio::test] +async fn downgrade_state_format() -> Result<()> { + let mut fake_version = database_format_version_in_code(); + fake_version.minor = u16::MAX.into(); + fake_version.patch = 0; + + for network in [Mainnet, Testnet] { + state_format_test( + "downgrade_state_format_test", + network, + 3, + Some(&fake_version), + ) + .await?; + } + + Ok(()) +} + /// Test state format changes, see calling tests for details. async fn state_format_test( base_test_name: &str, @@ -2483,9 +2506,11 @@ async fn state_format_test( .wrap_err("Possible port conflict. Are there other acceptance tests running?")?; // # Apply the fake version if needed + let mut expect_older_version = false; + let mut expect_newer_version = false; if let Some(fake_version) = fake_version { - let test_name = &format!("{base_test_name}/apply_fake_version"); + let test_name = &format!("{base_test_name}/apply_fake_version/{fake_version}"); tracing::info!(?network, "running {test_name} using zebra-state"); let mut config = UseAnyState @@ -2495,14 +2520,26 @@ async fn state_format_test( zebra_state::write_database_format_version_to_disk(fake_version, &config.state, network) .expect("can't write fake database version to disk"); + + let running_version = database_format_version_in_code(); + + if fake_version < &running_version { + expect_older_version = true; + } else if fake_version > &running_version { + expect_newer_version = true; + } } // # Reopen that state and check the version hasn't changed for reopened in 0..reopen_count { - let expect_older_version = fake_version.is_some() && reopened == 0; let test_name = &format!("{base_test_name}/reopen/{reopened}"); + if reopened > 0 { + expect_older_version = false; + expect_newer_version = false; + } + let mut zebrad = spawn_zebrad_without_rpc(network, test_name, false, false, dir, false)? .expect("unexpectedly missing required state or env vars"); @@ -2512,6 +2549,9 @@ async fn state_format_test( zebrad.expect_stdout_line_matches("trying to open older database format")?; zebrad.expect_stdout_line_matches("marking database format as upgraded")?; zebrad.expect_stdout_line_matches("database is fully upgraded")?; + } else if expect_newer_version { + zebrad.expect_stdout_line_matches("trying to open newer database format")?; + zebrad.expect_stdout_line_matches("marking database format as downgraded")?; } else { zebrad.expect_stdout_line_matches("trying to open current database format")?; zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; @@ -2527,11 +2567,13 @@ async fn state_format_test( ); } - assert!( - !logs.contains("marking database format as downgraded"), - "unexpected format downgrade in logs:\n\ - {logs}" - ); + if !expect_newer_version { + assert!( + !logs.contains("marking database format as downgraded"), + "unexpected format downgrade in logs:\n\ + {logs}" + ); + } let output = zebrad.wait_with_output()?; let mut output = output.assert_failure()?; From c189e6fe4448b6cb217e98a6f22ba4c7e3bed10d Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 13:09:11 +1000 Subject: [PATCH 31/43] Add a TODO for testing partial updates --- zebrad/tests/acceptance.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index fe2ef37e781..f942d765e9c 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2419,6 +2419,9 @@ async fn new_state_format() -> Result<()> { /// Check that outdated states are updated to the current state format version, /// and that restarting `zebrad` doesn't change the updated format version. +/// +/// TODO: test partial updates, once we have some updates that take a while. +/// (or just add a delay during tests) #[tokio::test] async fn update_state_format() -> Result<()> { let mut fake_version = database_format_version_in_code(); From ae806ba21d9cbf47ece2a1e2f32d4a4d026b0f8d Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 13:15:06 +1000 Subject: [PATCH 32/43] Fix missing test arguments --- .../tests/common/get_block_template_rpcs/get_block_template.rs | 2 +- zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs | 2 +- zebrad/tests/common/get_block_template_rpcs/submit_block.rs | 2 +- zebrad/tests/common/lightwalletd/send_transaction_test.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/zebrad/tests/common/get_block_template_rpcs/get_block_template.rs b/zebrad/tests/common/get_block_template_rpcs/get_block_template.rs index 689c51ce921..81a8ad4946c 100644 --- a/zebrad/tests/common/get_block_template_rpcs/get_block_template.rs +++ b/zebrad/tests/common/get_block_template_rpcs/get_block_template.rs @@ -66,7 +66,7 @@ pub(crate) async fn run() -> Result<()> { let network = Network::Mainnet; // Skip the test unless the user specifically asked for it and there is a zebrad_state_path - if !can_spawn_zebrad_for_test_type(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type, true) { return Ok(()); } diff --git a/zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs b/zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs index 7d0518c00db..5dd0fd81604 100644 --- a/zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs +++ b/zebrad/tests/common/get_block_template_rpcs/get_peer_info.rs @@ -21,7 +21,7 @@ pub(crate) async fn run() -> Result<()> { let network = Network::Mainnet; // Skip the test unless the user specifically asked for it and there is a zebrad_state_path - if !can_spawn_zebrad_for_test_type(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type, true) { return Ok(()); } diff --git a/zebrad/tests/common/get_block_template_rpcs/submit_block.rs b/zebrad/tests/common/get_block_template_rpcs/submit_block.rs index adea5fe5a7c..8e606606389 100644 --- a/zebrad/tests/common/get_block_template_rpcs/submit_block.rs +++ b/zebrad/tests/common/get_block_template_rpcs/submit_block.rs @@ -31,7 +31,7 @@ pub(crate) async fn run() -> Result<()> { let network = Network::Mainnet; // Skip the test unless the user specifically asked for it and there is a zebrad_state_path - if !can_spawn_zebrad_for_test_type(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type, true) { return Ok(()); } diff --git a/zebrad/tests/common/lightwalletd/send_transaction_test.rs b/zebrad/tests/common/lightwalletd/send_transaction_test.rs index 1ad46c33892..72b99d57f94 100644 --- a/zebrad/tests/common/lightwalletd/send_transaction_test.rs +++ b/zebrad/tests/common/lightwalletd/send_transaction_test.rs @@ -62,7 +62,7 @@ pub async fn run() -> Result<()> { let network = Mainnet; // Skip the test unless the user specifically asked for it - if !can_spawn_zebrad_for_test_type(test_name, test_type) { + if !can_spawn_zebrad_for_test_type(test_name, test_type, true) { return Ok(()); } From 196f2880dcb84df0e469a1234d2f715a9c585c0b Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 13:18:38 +1000 Subject: [PATCH 33/43] clippy if chain --- zebrad/tests/acceptance.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index f942d765e9c..ac63340e739 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -136,6 +136,7 @@ //! ``` use std::{ + cmp::Ordering, collections::HashSet, env, fs, panic, path::PathBuf, @@ -2526,10 +2527,10 @@ async fn state_format_test( let running_version = database_format_version_in_code(); - if fake_version < &running_version { - expect_older_version = true; - } else if fake_version > &running_version { - expect_newer_version = true; + match fake_version.cmp(&running_version) { + Ordering::Less => expect_older_version = true, + Ordering::Equal => {} + Ordering::Greater => expect_newer_version = true, } } From 74ce5bfd9a75517e51d441f33e0652b2afccd3a2 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 13:22:21 +1000 Subject: [PATCH 34/43] Fix typo --- zebra-test/src/command.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra-test/src/command.rs b/zebra-test/src/command.rs index f39ebf47129..75f45fc6d65 100644 --- a/zebra-test/src/command.rs +++ b/zebra-test/src/command.rs @@ -1373,7 +1373,7 @@ impl TestOutput { self.output.status.signal() == Some(9) } - /// Takes the generic `dir` paramter out of this `TestOutput`. + /// Takes the generic `dir` parameter out of this `TestOutput`. pub fn take_dir(&mut self) -> Option { self.dir.take() } From 08444e8cd28d1d44a3767b41bb517c80441f3f09 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 7 Jul 2023 13:33:13 +1000 Subject: [PATCH 35/43] another typo --- zebra-state/src/service/finalized_state/disk_format/upgrade.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 1b16d02e327..9717286436a 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -187,7 +187,7 @@ impl DbFormatChange { // Older supported versions just assume they can read newer formats, // because they can't predict all changes a newer Zebra version could make. // - // The resposibility of staying backwards-compatible is on the newer version. + // The responsibility of staying backwards-compatible is on the newer version. // We do this on a best-effort basis for versions that are still supported. } } From f7ed3624010398d6db69e2caa0b75742a7db792d Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 10 Jul 2023 06:08:49 +1000 Subject: [PATCH 36/43] Pass a database instance to the format upgrade task --- .../finalized_state/disk_format/upgrade.rs | 35 +++++++++++++++---- .../src/service/finalized_state/zebra_db.rs | 20 +++++++++-- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 9717286436a..592323a4189 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -16,7 +16,7 @@ use DbFormatChange::*; use crate::{ config::write_database_format_version_to_disk, database_format_version_in_code, - database_format_version_on_disk, Config, + database_format_version_on_disk, service::finalized_state::ZebraDb, Config, }; /// The kind of database format change we're performing. @@ -127,11 +127,15 @@ impl DbFormatChange { } /// Launch a `std::thread` that applies this format change to the database. + /// + /// `initial_tip_height` is the database height when it was opened, and `upgrade_db` is the + /// database instance to upgrade. pub fn spawn_format_change( self, config: Config, network: Network, initial_tip_height: Option, + upgrade_db: ZebraDb, ) -> DbFormatChangeThreadHandle { // # Correctness // @@ -142,7 +146,13 @@ impl DbFormatChange { let span = Span::current(); let update_task = thread::spawn(move || { span.in_scope(move || { - self.apply_format_change(config, network, initial_tip_height, cancel_receiver); + self.apply_format_change( + config, + network, + initial_tip_height, + upgrade_db, + cancel_receiver, + ); }) }); @@ -157,7 +167,9 @@ impl DbFormatChange { } /// Apply this format change to the database. - /// Format changes should be launched in an independent `std::thread`. + /// + /// Format changes should be launched in an independent `std::thread`, which runs until the + /// upgrade is finished. /// /// See `apply_format_upgrade()` for details. fn apply_format_change( @@ -165,13 +177,18 @@ impl DbFormatChange { config: Config, network: Network, initial_tip_height: Option, + upgrade_db: ZebraDb, cancel_receiver: mpsc::Receiver, ) { match self { // Handled in the rest of this function. - Upgrade { .. } => { - self.apply_format_upgrade(config, network, initial_tip_height, cancel_receiver) - } + Upgrade { .. } => self.apply_format_upgrade( + config, + network, + initial_tip_height, + upgrade_db, + cancel_receiver, + ), NewlyCreated { .. } => { Self::mark_as_newly_created(&config, network); @@ -205,6 +222,7 @@ impl DbFormatChange { config: Config, network: Network, initial_tip_height: Option, + upgrade_db: ZebraDb, cancel_receiver: mpsc::Receiver, ) { let Upgrade { @@ -260,6 +278,9 @@ impl DbFormatChange { && matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) { // TODO: Do one format upgrade step here + // + // This fake step just shows how to access the database. + let _replace_me_ = upgrade_db.tip(); upgrade_height = (upgrade_height + 1).expect("task exits before maximum height"); } @@ -276,6 +297,8 @@ impl DbFormatChange { ); Self::mark_as_upgraded_to(&database_format_add_format_change_task, &config, network); + // End of example format change. + // # New Upgrades Usually Go Here // // New code goes above this comment! diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index 542a7e27653..63decfe10db 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -76,16 +76,32 @@ impl ZebraDb { db.check_max_on_disk_tip_height(); + // We have to get this height before we spawn the upgrade task, because threads can take + // a while to start, and new blocks can be committed as soon as we return from this method. let initial_tip_height = db.finalized_tip_height(); // Start any required format changes. + // + // TODO: should debug_stop_at_height wait for these upgrades, or not? if let Some(format_change) = format_change { // Launch the format change and install its handle in the database. - db.format_change_handle = Some(format_change.spawn_format_change( + // + // `upgrade_db` is a special clone of the database, which can't be used to shut down + // the upgrade task. (Because the task hasn't been launched yet, + // `db.format_change_handle` is always None.) + // + // It can be a FinalizedState if needed, or the FinalizedState methods needed for + // upgrades can be moved to ZebraDb. + let upgrade_db = db.clone(); + + let format_change_handle = format_change.spawn_format_change( config.clone(), network, initial_tip_height, - )); + upgrade_db, + ); + + db.format_change_handle = Some(format_change_handle); } db From adbb2020bfaea241080101b26e80355688f09add Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 10 Jul 2023 06:18:15 +1000 Subject: [PATCH 37/43] Fix a timing issue in the tests --- .../finalized_state/disk_format/upgrade.rs | 27 +++++++++---------- zebrad/tests/acceptance.rs | 12 ++++----- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 592323a4189..8527c8f4c76 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -247,10 +247,9 @@ impl DbFormatChange { Self::mark_as_upgraded_to(&database_format_version_in_code(), &config, network); info!( - ?initial_tip_height, ?newer_running_version, ?older_disk_version, - "database is fully upgraded" + "empty database is fully upgraded" ); return; @@ -339,14 +338,14 @@ impl DbFormatChange { running: {running_version}" ); + write_database_format_version_to_disk(&running_version, config, network) + .expect("unable to write database format version file to disk"); + info!( ?running_version, ?disk_version, - "marking database format as newly created" + "marked database format as newly created" ); - - write_database_format_version_to_disk(&running_version, config, network) - .expect("unable to write database format version file to disk"); } /// Mark the database as upgraded to `format_upgrade_version`. @@ -400,15 +399,15 @@ impl DbFormatChange { running: {running_version}" ); + write_database_format_version_to_disk(format_upgrade_version, config, network) + .expect("unable to write database format version file to disk"); + info!( ?running_version, ?format_upgrade_version, ?disk_version, - "marking database format as upgraded" + "marked database format as upgraded" ); - - write_database_format_version_to_disk(format_upgrade_version, config, network) - .expect("unable to write database format version file to disk"); } /// Mark the database as downgraded to the running database version. @@ -439,14 +438,14 @@ impl DbFormatChange { running: {running_version}" ); + write_database_format_version_to_disk(&running_version, config, network) + .expect("unable to write database format version file to disk"); + info!( ?running_version, ?disk_version, - "marking database format as downgraded" + "marked database format as downgraded" ); - - write_database_format_version_to_disk(&running_version, config, network) - .expect("unable to write database format version file to disk"); } } diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index ac63340e739..aa734ce180f 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2487,12 +2487,12 @@ async fn state_format_test( let logs = zebrad.kill_and_return_output(false)?; assert!( - !logs.contains("marking database format as upgraded"), + !logs.contains("marked database format as upgraded"), "unexpected format upgrade in logs:\n\ {logs}" ); assert!( - !logs.contains("marking database format as downgraded"), + !logs.contains("marked database format as downgraded"), "unexpected format downgrade in logs:\n\ {logs}" ); @@ -2551,11 +2551,11 @@ async fn state_format_test( if expect_older_version { zebrad.expect_stdout_line_matches("trying to open older database format")?; - zebrad.expect_stdout_line_matches("marking database format as upgraded")?; + zebrad.expect_stdout_line_matches("marked database format as upgraded")?; zebrad.expect_stdout_line_matches("database is fully upgraded")?; } else if expect_newer_version { zebrad.expect_stdout_line_matches("trying to open newer database format")?; - zebrad.expect_stdout_line_matches("marking database format as downgraded")?; + zebrad.expect_stdout_line_matches("marked database format as downgraded")?; } else { zebrad.expect_stdout_line_matches("trying to open current database format")?; zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; @@ -2565,7 +2565,7 @@ async fn state_format_test( if !expect_older_version { assert!( - !logs.contains("marking database format as upgraded"), + !logs.contains("marked database format as upgraded"), "unexpected format upgrade in logs:\n\ {logs}" ); @@ -2573,7 +2573,7 @@ async fn state_format_test( if !expect_newer_version { assert!( - !logs.contains("marking database format as downgraded"), + !logs.contains("marked database format as downgraded"), "unexpected format downgrade in logs:\n\ {logs}" ); From 20e642e9217224f4dc1239266c5355809af72272 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 10 Jul 2023 06:28:19 +1000 Subject: [PATCH 38/43] Fix version matching in CI --- .github/workflows/deploy-gcp-tests.yml | 6 +++--- zebrad/src/application.rs | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 451b295d2c8..24c46bb3662 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -1754,8 +1754,8 @@ jobs: # either a semantic version or "creating new database" INITIAL_DISK_DB_VERSION=$( \ echo "$DOCKER_LOGS" | \ - grep --extended-regexp --only-matching 'initial disk state version: [0-9a-z\. ]+' | \ - grep --extended-regexp --only-matching '[0-9a-z\. ]+' | \ + grep --extended-regexp --only-matching 'initial disk state version: [0-9a-z\.]+' | \ + grep --extended-regexp --only-matching '[0-9a-z\.]+' | \ tail -1 || \ [[ $? == 1 ]] \ ) @@ -1771,7 +1771,7 @@ jobs: false fi - if [[ "$INITIAL_DISK_DB_VERSION" = "creating new database" ]]; then + if [[ "$INITIAL_DISK_DB_VERSION" = "creating.new.database" ]]; then INITIAL_DISK_DB_VERSION="new" else INITIAL_DISK_DB_VERSION="v${INITIAL_DISK_DB_VERSION//./-}" diff --git a/zebrad/src/application.rs b/zebrad/src/application.rs index 161719dbff7..133465ffa58 100644 --- a/zebrad/src/application.rs +++ b/zebrad/src/application.rs @@ -266,7 +266,8 @@ impl Application for ZebradApp { let disk_db_version = match database_format_version_on_disk(&config.state, config.network.network) { Ok(Some(version)) => version.to_string(), - Ok(None) => "creating new database".to_string(), + // This "version" is specially formatted to match a relaxed version regex in CI + Ok(None) => "creating.new.database".to_string(), Err(error) => { let mut error = format!("error: {error:?}"); error.truncate(100); From 3455327b771070cfe8832ca3bc70a32bd83bff46 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 10 Jul 2023 06:30:11 +1000 Subject: [PATCH 39/43] Use correct env var reference --- .github/workflows/deploy-gcp-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 24c46bb3662..68028c7bb78 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -1920,7 +1920,7 @@ jobs: --source-disk-zone=${{ vars.GCP_ZONE }} \ --storage-location=us \ --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }} and database format ${{ env.DB_VERSION_SUMMARY }}" \ - --labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},state-running-version=${{ env.RUNNING_DB_VERSION }},initial-state-disk-version=${{ env.INITIAL_DISK_DB_VERSION }},network=${NETWORK},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},force-save=${{ inputs.force_save_to_disk }},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}" + --labels="height=${{ env.SYNC_HEIGHT }},purpose=${{ inputs.disk_prefix }},commit=${{ env.GITHUB_SHA_SHORT }},state-version=${{ env.STATE_VERSION }},state-running-version=${RUNNING_DB_VERSION},initial-state-disk-version=${INITIAL_DISK_DB_VERSION},network=${NETWORK},target-height-kind=${{ inputs.disk_suffix }},update-flag=${UPDATE_SUFFIX},force-save=${{ inputs.force_save_to_disk }},updated-from-height=${ORIGINAL_HEIGHT},test-id=${{ inputs.test_id }},app-name=${{ inputs.app_name }}" else echo "Skipped cached state update because the new sync height $SYNC_HEIGHT was less than $CACHED_STATE_UPDATE_LIMIT blocks above the original height $ORIGINAL_HEIGHT" fi From 4c3fa5c2ab105e61e0b00886d568185a83d876de Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 10 Jul 2023 06:32:55 +1000 Subject: [PATCH 40/43] Use correct github env file --- .github/workflows/deploy-gcp-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 68028c7bb78..03b23be75da 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -1778,7 +1778,7 @@ jobs: fi echo "Found initial disk database version in logs: $INITIAL_DISK_DB_VERSION" - echo "INITIAL_DISK_DB_VERSION=$INITIAL_DISK_DB_VERSION" >> "$INITIAL_DISK_DB_VERSION" + echo "INITIAL_DISK_DB_VERSION=$INITIAL_DISK_DB_VERSION" >> "$GITHUB_ENV" RUNNING_DB_VERSION=$( \ echo "$DOCKER_LOGS" | \ @@ -1801,7 +1801,7 @@ jobs: RUNNING_DB_VERSION="v${RUNNING_DB_VERSION//./-}" echo "Found running database version in logs: $RUNNING_DB_VERSION" - echo "RUNNING_DB_VERSION=$RUNNING_DB_VERSION" >> "$RUNNING_DB_VERSION" + echo "RUNNING_DB_VERSION=$RUNNING_DB_VERSION" >> "$GITHUB_ENV" if [[ "$INITIAL_DISK_DB_VERSION" = "$RUNNING_DB_VERSION" ]]; then DB_VERSION_SUMMARY="$RUNNING_DB_VERSION" @@ -1812,7 +1812,7 @@ jobs: fi echo "Summarised database versions from logs: $DB_VERSION_SUMMARY" - echo "DB_VERSION_SUMMARY=$DB_VERSION_SUMMARY" >> "$DB_VERSION_SUMMARY" + echo "DB_VERSION_SUMMARY=$DB_VERSION_SUMMARY" >> "$GITHUB_ENV" # Get the sync height from the test logs, which is later used as part of the # disk description and labels. From 9c12699e6525ee1cda61b17f716db49daacfa01c Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 10 Jul 2023 08:16:36 +1000 Subject: [PATCH 41/43] Wait for the database to be written before killing Zebra --- zebrad/tests/acceptance.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index aa734ce180f..a35c05f5d44 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -2484,6 +2484,9 @@ async fn state_format_test( zebrad.expect_stdout_line_matches("creating new database with the current format")?; zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; + // Give Zebra enough time to actually write the database to disk. + tokio::time::sleep(Duration::from_secs(1)).await; + let logs = zebrad.kill_and_return_output(false)?; assert!( @@ -2525,6 +2528,9 @@ async fn state_format_test( zebra_state::write_database_format_version_to_disk(fake_version, &config.state, network) .expect("can't write fake database version to disk"); + // Give zebra_state enough time to actually write the database version to disk. + tokio::time::sleep(Duration::from_secs(1)).await; + let running_version = database_format_version_in_code(); match fake_version.cmp(&running_version) { @@ -2561,6 +2567,9 @@ async fn state_format_test( zebrad.expect_stdout_line_matches("loaded Zebra state cache")?; } + // Give Zebra enough time to actually write the database to disk. + tokio::time::sleep(Duration::from_secs(1)).await; + let logs = zebrad.kill_and_return_output(false)?; if !expect_older_version { From 7b9a0aed04efaf61d35988283fea5d2c841da0f3 Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 10 Jul 2023 16:57:03 +1000 Subject: [PATCH 42/43] Use correct workflow syntax --- .github/workflows/continous-integration-docker.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/continous-integration-docker.yml b/.github/workflows/continous-integration-docker.yml index 5454acba9a5..d761a235fab 100644 --- a/.github/workflows/continous-integration-docker.yml +++ b/.github/workflows/continous-integration-docker.yml @@ -335,7 +335,7 @@ jobs: test_variables: '-e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1' needs_zebra_state: false saves_to_disk: true - force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk || false }} disk_suffix: checkpoint height_grep_text: 'flushing database to disk .*height.*=.*Height.*\(' secrets: inherit @@ -397,7 +397,7 @@ jobs: is_long_test: true needs_zebra_state: false saves_to_disk: true - force_save_to_disk: ${{ github.event.inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk || false }} disk_suffix: tip height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit @@ -437,7 +437,7 @@ jobs: needs_zebra_state: true # update the disk on every PR, to increase CI speed saves_to_disk: true - force_save_to_disk: ${{ inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk || false }} disk_suffix: tip root_state_path: '/var/cache' zebra_state_dir: 'zebrad-cache' @@ -510,7 +510,7 @@ jobs: is_long_test: true needs_zebra_state: false saves_to_disk: true - force_save_to_disk: ${{ inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk || false }} disk_suffix: tip height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit @@ -554,7 +554,7 @@ jobs: # update the disk on every PR, to increase CI speed # we don't have a test-update-sync-testnet job, so we need to update the disk here saves_to_disk: true - force_save_to_disk: ${{ inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk || false }} disk_suffix: tip root_state_path: '/var/cache' zebra_state_dir: 'zebrad-cache' @@ -587,7 +587,7 @@ jobs: needs_zebra_state: true needs_lwd_state: false saves_to_disk: true - force_save_to_disk: ${{ inputs.force_save_to_disk }} + force_save_to_disk: ${{ inputs.force_save_to_disk || false }} disk_prefix: lwd-cache disk_suffix: tip root_state_path: '/var/cache' From 99359eb2272a06ca1ceee53e1d6e40d053fdfc54 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 13 Jul 2023 07:39:08 +1000 Subject: [PATCH 43/43] Version changes aren't always upgrades --- .github/workflows/deploy-gcp-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 03b23be75da..8b5318aab94 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -1808,7 +1808,7 @@ jobs: elif [[ "$INITIAL_DISK_DB_VERSION" = "new" ]]; then DB_VERSION_SUMMARY="$RUNNING_DB_VERSION in new database" else - DB_VERSION_SUMMARY="$INITIAL_DISK_DB_VERSION upgrading to $RUNNING_DB_VERSION" + DB_VERSION_SUMMARY="$INITIAL_DISK_DB_VERSION changing to $RUNNING_DB_VERSION" fi echo "Summarised database versions from logs: $DB_VERSION_SUMMARY"