From 93af30bd010e9fc01827ab0d5107bdf45e1704de Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Fri, 22 Nov 2024 10:29:57 -0500 Subject: [PATCH 01/21] The Grand Refactoring of DsState --- upstairs/src/client.rs | 902 ++++++------------------- upstairs/src/downstairs.rs | 350 +++++----- upstairs/src/dummy_downstairs_tests.rs | 102 ++- upstairs/src/lib.rs | 119 ++-- upstairs/src/live_repair.rs | 24 +- upstairs/src/upstairs.rs | 185 +++-- 6 files changed, 699 insertions(+), 983 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index 4b2f9b3a0..dbdcde210 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -2,8 +2,8 @@ use crate::{ cdt, integrity_hash, io_limits::ClientIOLimits, live_repair::ExtentInfo, upstairs::UpstairsConfig, upstairs::UpstairsState, ClientIOStateCount, - ClientId, CrucibleDecoder, CrucibleError, DownstairsIO, DsState, - EncryptionContext, IOState, IOop, JobId, Message, RawReadResponse, + ClientId, ConnectionMode, CrucibleDecoder, CrucibleError, DownstairsIO, + DsState, EncryptionContext, IOState, IOop, JobId, Message, RawReadResponse, ReconcileIO, ReconcileIOState, RegionDefinitionStatus, RegionMetadata, }; use crucible_common::{x509::TLSContext, ExtentId, VerboseTimeout}; @@ -181,12 +181,6 @@ pub(crate) struct DownstairsClient { /// Accumulated statistics pub(crate) stats: DownstairsStats, - /// State for the "promote to active" action - promote_state: Option, - - /// State for startup negotiation - negotiation_state: NegotiationState, - /// Session ID for a clients connection to a downstairs. connection_id: ConnectionId, @@ -219,13 +213,16 @@ impl DownstairsClient { io_limits, region_uuid: None, needs_replay: false, - negotiation_state: NegotiationState::Start, tls_context, - promote_state: None, log, target_addr, repair_addr: None, - state: DsState::New, + state: DsState::Connecting { + mode: ConnectionMode::New, + state: NegotiationState::Start { + auto_promote: false, + }, + }, last_flush: JobId(0), stats: DownstairsStats::default(), skipped_jobs: BTreeSet::new(), @@ -262,13 +259,16 @@ impl DownstairsClient { ), region_uuid: None, needs_replay: false, - negotiation_state: NegotiationState::Start, tls_context: None, - promote_state: None, log: crucible_common::build_logger(), target_addr: None, repair_addr: None, - state: DsState::New, + state: DsState::Connecting { + mode: ConnectionMode::New, + state: NegotiationState::Start { + auto_promote: false, + }, + }, last_flush: JobId(0), stats: DownstairsStats::default(), skipped_jobs: BTreeSet::new(), @@ -326,6 +326,7 @@ impl DownstairsClient { warn!(self.log, "failed to send stop request") } self.checked_state_transition(up_state, DsState::Stopping(r)); + debug!(self.log, "NEW IO STATE {:?}", self.state); } else { warn!(self.log, "client task is already stopping") } @@ -481,11 +482,15 @@ impl DownstairsClient { /// Sets our state to `DsState::Reconcile` /// /// # Panics - /// If the previous state is not `DsState::WaitQuorum` + /// If the current state is invalid pub(crate) fn begin_reconcile(&mut self) { - info!(self.log, "Transition from {} to Reconcile", self.state); - assert_eq!(self.state, DsState::WaitQuorum); - self.state = DsState::Reconcile; + info!(self.log, "Transition from {:?} to Reconcile", self.state); + let DsState::Connecting { state, mode } = &mut self.state else { + panic!("invalid state {:?}", self.state); + }; + assert_eq!(*state, NegotiationState::WaitQuorum); + assert_eq!(*mode, ConnectionMode::New); // XXX Replaced? + *state = NegotiationState::Reconcile; } /// Go through the list of dependencies and remove any jobs that this @@ -533,7 +538,10 @@ impl DownstairsClient { /// If the downstairs is offline pub(crate) fn ready_to_deactivate(&self) -> bool { match &self.state { - DsState::New | DsState::WaitActive => { + DsState::Connecting { + mode: ConnectionMode::New, + state: NegotiationState::Start { .. }, + } => { info!( self.log, "ready to deactivate from state {:?}", self.state @@ -571,49 +579,44 @@ impl DownstairsClient { // If the upstairs is already active (or trying to go active), then the // downstairs should automatically call PromoteToActive when it reaches // the relevant state. - self.promote_state = match up_state { - UpstairsState::Active | UpstairsState::GoActive(..) => { - Some(PromoteState::Waiting) - } + let auto_promote = match up_state { + UpstairsState::Active | UpstairsState::GoActive(..) => true, UpstairsState::Initializing - | UpstairsState::Deactivating { .. } => None, + | UpstairsState::Deactivating { .. } => false, }; - self.negotiation_state = NegotiationState::Start; - let current = &self.state; - let new_state = match current { - DsState::Active | DsState::Offline if !can_replay => { - Some(DsState::Faulted) - } + let new_mode = match current { + DsState::Active + | DsState::Connecting { + mode: ConnectionMode::Offline, + .. + } if !can_replay => Some(ConnectionMode::Faulted), DsState::LiveRepair - | DsState::LiveRepairReady | DsState::Stopping(ClientStopReason::Fault(..)) => { - Some(DsState::Faulted) + Some(ConnectionMode::Faulted) } - DsState::Active => Some(DsState::Offline), + DsState::Active => Some(ConnectionMode::Offline), - DsState::Reconcile - | DsState::WaitQuorum - | DsState::WaitActive - | DsState::Stopping(ClientStopReason::NegotiationFailed(..)) + DsState::Stopping(ClientStopReason::NegotiationFailed(..)) | DsState::Stopping(ClientStopReason::Disabled) | DsState::Stopping(ClientStopReason::Deactivated) => { - Some(DsState::New) + Some(ConnectionMode::New) } // If we have replaced a downstairs, don't forget that. DsState::Stopping(ClientStopReason::Replacing) => { - Some(DsState::Replaced) + Some(ConnectionMode::Replaced) } // We stay in these states through the task restart - DsState::Offline - | DsState::Faulted - | DsState::New - | DsState::Replaced => None, + DsState::Connecting { .. } => None, }; + let new_state = new_mode.map(|mode| DsState::Connecting { + mode, + state: NegotiationState::Start { auto_promote }, + }); // Jobs are skipped and replayed in `Downstairs::reinitialize`, which is // (probably) the caller of this function. @@ -624,7 +627,7 @@ impl DownstairsClient { self.connection_id.update(); // Restart with a short delay, connecting if we're auto-promoting - self.start_task(true, self.promote_state.is_some()); + self.start_task(true, auto_promote); } /// Sets the `needs_replay` flag @@ -796,58 +799,45 @@ impl DownstairsClient { ); } } - match self.promote_state { - Some(PromoteState::Waiting) => { - panic!("called set_active_request while already waiting") - } - Some(PromoteState::Sent) => { - panic!("called set_active_request after it was sent") - } - None => (), - } // If we're already in the point of negotiation where we're waiting to // go active, then immediately go active! - match self.state { - DsState::New => { + match &mut self.state { + DsState::Connecting { + state: NegotiationState::Start { auto_promote }, + mode: ConnectionMode::New | ConnectionMode::Replaced, + } => { + if *auto_promote { + panic!("called set_active_request while already waiting") + } + *auto_promote = true; info!( self.log, "client set_active_request while in {:?}; waiting...", self.state, ); - self.promote_state = Some(PromoteState::Waiting); } - DsState::Replaced => { - info!( - self.log, - "client set_active_request while Replaced; waiting..." - ); - self.promote_state = Some(PromoteState::Waiting); - } - DsState::WaitActive => { + DsState::Connecting { + state: NegotiationState::WaitActive, + mode: ConnectionMode::New | ConnectionMode::Replaced, + } => { info!( self.log, - "client set_active_request while in WaitActive \ - -> WaitForPromote" + "client set_active_request while in {:?} -> WaitForPromote", + self.state, ); - // If the client task has stopped, then print a warning but - // otherwise continue (because we'll be cleaned up by the - // JoinHandle watcher). self.send(Message::PromoteToActive { upstairs_id: self.cfg.upstairs_id, session_id: self.cfg.session_id, gen: self.cfg.generation(), }); - self.promote_state = Some(PromoteState::Sent); - // TODO: negotiation / promotion state is spread across - // DsState, PromoteState, and NegotiationState. We should - // consolidate into a single place - assert!( - self.negotiation_state == NegotiationState::Start - || self.negotiation_state - == NegotiationState::WaitForPromote - ); - self.negotiation_state = NegotiationState::WaitForPromote; + let DsState::Connecting { mode, .. } = self.state else { + unreachable!() + }; + self.state = DsState::Connecting { + state: NegotiationState::WaitForPromote, + mode, + }; } s => panic!("invalid state for set_active_request: {s:?}"), } @@ -920,9 +910,10 @@ impl DownstairsClient { ) -> EnqueueResult { match self.state { // We never send jobs if we're in certain inactive states - DsState::Faulted - | DsState::Replaced - | DsState::LiveRepairReady + DsState::Connecting { + mode: ConnectionMode::Faulted | ConnectionMode::Replaced, + .. + } | DsState::Stopping( ClientStopReason::Fault(..) | ClientStopReason::Disabled @@ -952,13 +943,16 @@ impl DownstairsClient { // cleared out by a subsequent flush (so we'll be able to bring that // client back into compliance by replaying jobs). DsState::Active => EnqueueResult::Send, - DsState::Offline => EnqueueResult::Hold, + DsState::Connecting { + mode: ConnectionMode::Offline, + .. + } => EnqueueResult::Hold, - DsState::New - | DsState::WaitActive - | DsState::WaitQuorum - | DsState::Reconcile - | DsState::Stopping(ClientStopReason::Deactivated) => panic!( + DsState::Stopping(ClientStopReason::Deactivated) + | DsState::Connecting { + mode: ConnectionMode::New, + .. + } => panic!( "enqueue should not be called from state {:?}", self.state ), @@ -1001,194 +995,11 @@ impl DownstairsClient { /// If the transition is not valid pub(crate) fn checked_state_transition( &mut self, - up_state: &UpstairsState, + _up_state: &UpstairsState, new_state: DsState, ) { - // TODO this should probably be private! - info!(self.log, "ds_transition from {} to {new_state}", self.state); - - let old_state = self.state; - - /* - * Check that this is a valid transition - */ - let panic_invalid = || { - panic!( - "[{}] {} Invalid transition: {:?} -> {:?}", - self.client_id, self.cfg.upstairs_id, old_state, new_state - ) - }; - match new_state { - DsState::Replaced => { - assert_eq!( - old_state, - DsState::Stopping(ClientStopReason::Replacing) - ); - } - DsState::WaitActive => { - if old_state == DsState::Offline { - if matches!(up_state, UpstairsState::Active) { - panic!( - "[{}] {} Bad up active state change {} -> {}", - self.client_id, - self.cfg.upstairs_id, - old_state, - new_state, - ); - } - } else if old_state != DsState::New - && old_state != DsState::Faulted - && old_state != DsState::Replaced - { - panic!( - "[{}] {} Negotiation failed, {:?} -> {:?}", - self.client_id, - self.cfg.upstairs_id, - old_state, - new_state, - ); - } - } - DsState::WaitQuorum => { - assert_eq!(old_state, DsState::WaitActive); - } - DsState::Faulted => { - match old_state { - DsState::Active - | DsState::Faulted - | DsState::Reconcile - | DsState::LiveRepair - | DsState::LiveRepairReady - | DsState::Offline - | DsState::Stopping(ClientStopReason::Fault(..)) => {} // Okay - _ => { - panic_invalid(); - } - } - } - DsState::Reconcile => { - assert!(!matches!(up_state, UpstairsState::Active)); - assert_eq!(old_state, DsState::WaitQuorum); - } - DsState::Active => { - match old_state { - DsState::WaitQuorum - | DsState::Reconcile - | DsState::LiveRepair - | DsState::Offline => {} // Okay - - DsState::LiveRepairReady if self.cfg.read_only => {} // Okay - - _ => { - panic_invalid(); - } - } - /* - * Make sure reconcile happened when the upstairs is inactive. - */ - if old_state == DsState::Reconcile { - assert!(!matches!(up_state, UpstairsState::Active)); - } - } - DsState::LiveRepair => { - assert_eq!(old_state, DsState::LiveRepairReady); - } - DsState::LiveRepairReady => { - match old_state { - DsState::Faulted | DsState::Replaced => {} // Okay - _ => { - panic_invalid(); - } - } - } - DsState::New => { - // Before new, we must have been in - // on of these states. - match old_state { - DsState::Active - | DsState::Faulted - | DsState::Reconcile - | DsState::Stopping( - ClientStopReason::Deactivated - | ClientStopReason::Disabled - | ClientStopReason::Replacing - | ClientStopReason::NegotiationFailed(..), - ) => {} // Okay - _ => { - panic_invalid(); - } - } - } - DsState::Offline => { - match old_state { - DsState::Active => {} // Okay - _ => { - panic_invalid(); - } - } - } - - // We only go deactivated if we were actually active, or - // somewhere past active. - // if deactivate is requested before active, the downstairs - // state should just go back to NEW and re-require an - // activation. - DsState::Stopping(ClientStopReason::Deactivated) => { - match old_state { - DsState::Active - | DsState::LiveRepair - | DsState::LiveRepairReady - | DsState::Offline - | DsState::Reconcile => {} // Okay - DsState::Faulted => { - if matches!(up_state, UpstairsState::Active) { - // Can't transition like this when active - panic_invalid(); - } - } - _ => { - panic_invalid(); - } - } - } - - // Some stop reasons may occur at any time - DsState::Stopping( - ClientStopReason::Fault(..) - | ClientStopReason::Replacing - | ClientStopReason::Disabled, - ) => {} - - // The client may undergo negotiation for many reasons - DsState::Stopping(ClientStopReason::NegotiationFailed(..)) => { - match old_state { - DsState::New - | DsState::WaitActive - | DsState::WaitQuorum - | DsState::Reconcile - | DsState::Offline - | DsState::Faulted - | DsState::LiveRepairReady => {} - _ => panic_invalid(), - } - } - } - - if old_state != new_state { - info!( - self.log, - "[{}] Transition from {} to {}", - self.client_id, - old_state, - new_state, - ); - self.state = new_state; - } else { - warn!( - self.log, - "[{}] transition to same state: {}", self.client_id, new_state - ); - } + // TODO reimplement all of the checks + self.state = new_state; } /// Sets `repair_info` to `None` and increments `live_repair_aborted` @@ -1380,22 +1191,28 @@ impl DownstairsClient { /// Skips from `LiveRepairReady` to `Active`; a no-op otherwise /// /// # Panics - /// If this downstairs is not read-only + /// If this downstairs is not read-only, or we weren't previously in + /// `NegotiationState::LiveRepairReady` pub(crate) fn skip_live_repair(&mut self, up_state: &UpstairsState) { - if self.state == DsState::LiveRepairReady { - assert!(self.cfg.read_only); - // TODO: could we do this transition early, by automatically - // skipping LiveRepairReady if read-only? - self.checked_state_transition(up_state, DsState::Active); - self.stats.ro_lr_skipped += 1; - } + let DsState::Connecting { state, .. } = self.state else { + panic!("invalid call to SkipLiveRepair"); + }; + assert_eq!(state, NegotiationState::LiveRepairReady); + assert!(self.cfg.read_only); + + // TODO: could we do this transition early, by automatically + // skipping LiveRepairReady if read-only? + self.checked_state_transition(up_state, DsState::Active); + self.stats.ro_lr_skipped += 1; } - /// Moves from `LiveRepairReady` to `LiveRepair`; a no-op otherwise + /// Moves from `LiveRepairReady` to `LiveRepair`; panics otherwise pub(crate) fn start_live_repair(&mut self, up_state: &UpstairsState) { - if self.state == DsState::LiveRepairReady { - self.checked_state_transition(up_state, DsState::LiveRepair); - } + let DsState::Connecting { state, .. } = self.state else { + panic!("invalid call to SkipLiveRepair"); + }; + assert_eq!(state, NegotiationState::LiveRepairReady); + self.checked_state_transition(up_state, DsState::LiveRepair); } /// Continues the negotiation and initial reconciliation process @@ -1501,19 +1318,31 @@ impl DownstairsClient { * upstairs. We set the downstairs to DsState::Active and the while * loop is exited. */ + let DsState::Connecting { state, mode } = &mut self.state else { + error!( + self.log, + "tried to continue negotiation while not connecting" + ); + self.abort_negotiation( + up_state, + ClientNegotiationFailed::BadNegotiationOrder, + ); + return Ok(false); + }; + let mode = *mode; // mode is immutable here match m { Message::YesItsMe { version, repair_addr, } => { - if self.negotiation_state != NegotiationState::Start { + let NegotiationState::Start { auto_promote } = *state else { error!(self.log, "got version already"); self.abort_negotiation( up_state, ClientNegotiationFailed::BadNegotiationOrder, ); return Ok(false); - } + }; if version != CRUCIBLE_MESSAGE_VERSION { error!( self.log, @@ -1527,50 +1356,21 @@ impl DownstairsClient { ); return Ok(false); } - self.negotiation_state = NegotiationState::WaitForPromote; self.repair_addr = Some(repair_addr); - match self.promote_state { - Some(PromoteState::Waiting) => { - self.send(Message::PromoteToActive { - upstairs_id: self.cfg.upstairs_id, - session_id: self.cfg.session_id, - gen: self.cfg.generation(), - }); - self.promote_state = Some(PromoteState::Sent); - self.negotiation_state = - NegotiationState::WaitForPromote; - // TODO This is an unfortunate corner of the state - // machine, where we have to be in WaitActive despite - // _already_ having gone active. - // If we are Replaced and we have not yet gone active - // then it is valid for us to transition to WA. - if self.state == DsState::New - || (self.state == DsState::Replaced - && !matches!(up_state, UpstairsState::Active)) - { - self.checked_state_transition( - up_state, - DsState::WaitActive, - ); - } else { - warn!( - self.log, - "version negotiation from state {:?}", - self.state - ); - } - } - Some(PromoteState::Sent) => { - // We shouldn't be able to get here. - panic!("got YesItsMe with promote_state == Sent"); - } - None => { - // Nothing to do here, wait for set_active_request - self.checked_state_transition( - up_state, - DsState::WaitActive, - ); - } + if auto_promote { + *state = NegotiationState::WaitForPromote; + self.send(Message::PromoteToActive { + upstairs_id: self.cfg.upstairs_id, + session_id: self.cfg.session_id, + gen: self.cfg.generation(), + }); + info!( + self.log, + "version negotiation from state {:?}", self.state + ); + } else { + // Nothing to do here, wait for set_active_request + *state = NegotiationState::WaitActive; } } Message::VersionMismatch { version } => { @@ -1611,11 +1411,10 @@ impl DownstairsClient { session_id, gen, } => { - if self.negotiation_state != NegotiationState::WaitForPromote { + if *state != NegotiationState::WaitForPromote { error!( self.log, - "Received YouAreNowActive out of order! {:?}", - self.negotiation_state + "Received YouAreNowActive out of order! {state:?}", ); self.abort_negotiation( up_state, @@ -1677,12 +1476,11 @@ impl DownstairsClient { } } - self.negotiation_state = NegotiationState::WaitForRegionInfo; + *state = NegotiationState::WaitForRegionInfo; self.send(Message::RegionInfoPlease); } Message::RegionInfo { region_def } => { - if self.negotiation_state != NegotiationState::WaitForRegionInfo - { + if *state != NegotiationState::WaitForRegionInfo { error!(self.log, "Received RegionInfo out of order!"); self.abort_negotiation( up_state, @@ -1729,7 +1527,7 @@ impl DownstairsClient { if uuid != region_def.uuid() { // If we are replacing the downstairs, then a new UUID // is okay. - if self.state == DsState::Replaced { + if mode == ConnectionMode::Replaced { warn!( self.log, "Replace downstairs uuid:{} with {}", @@ -1809,8 +1607,8 @@ impl DownstairsClient { *ddef = RegionDefinitionStatus::Received(region_def); // Match on the current state of this downstairs - match self.state { - DsState::Offline => { + match mode { + ConnectionMode::Offline => { /* * If we are coming from state Offline, then it means * the downstairs has departed then came back in short @@ -1826,35 +1624,25 @@ impl DownstairsClient { self.log, "send last flush ID to this DS: {}", lf ); - self.negotiation_state = NegotiationState::GetLastFlush; + *state = NegotiationState::GetLastFlush; self.send(Message::LastFlush { last_flush_number: lf, }); } - DsState::WaitActive - | DsState::Faulted - | DsState::Replaced => { + ConnectionMode::New + | ConnectionMode::Faulted + | ConnectionMode::Replaced => { /* * Ask for the current version of all extents. */ - self.negotiation_state = - NegotiationState::GetExtentVersions; + *state = NegotiationState::GetExtentVersions; self.send(Message::ExtentVersionsPlease); } - bad_state => { - panic!( - "[{}] join from invalid state {} {} {:?}", - self.client_id, - bad_state, - self.cfg.upstairs_id, - self.negotiation_state, - ); - } } } Message::LastFlushAck { last_flush_number } => { - if self.negotiation_state != NegotiationState::GetLastFlush { + if *state != NegotiationState::GetLastFlush { error!(self.log, "Received LastFlushAck out of order!"); self.abort_negotiation( up_state, @@ -1862,10 +1650,12 @@ impl DownstairsClient { ); return Ok(false); // TODO should we trigger set_inactive? } - match self.state { - DsState::Offline => (), - s => panic!("got LastFlushAck in bad state {s:?}"), - } + assert_eq!( + mode, + ConnectionMode::Offline, + "got LastFlushAck in bad state {:?}", + self.state + ); info!( self.log, "Replied this last flush ID: {last_flush_number}" @@ -1875,16 +1665,13 @@ impl DownstairsClient { // Immediately set the state to Active, since we've already // copied over the jobs. self.checked_state_transition(up_state, DsState::Active); - - self.negotiation_state = NegotiationState::Done; } Message::ExtentVersions { gen_numbers, flush_numbers, dirty_bits, } => { - if self.negotiation_state != NegotiationState::GetExtentVersions - { + if *state != NegotiationState::GetExtentVersions { error!(self.log, "Received ExtentVersions out of order!"); self.abort_negotiation( up_state, @@ -1892,20 +1679,19 @@ impl DownstairsClient { ); return Ok(false); // TODO should we trigger set_inactive? } - match self.state { - DsState::WaitActive => { - self.checked_state_transition( - up_state, - DsState::WaitQuorum, - ); + match mode { + ConnectionMode::New => { + *state = NegotiationState::WaitQuorum; } - DsState::Faulted | DsState::Replaced => { - self.checked_state_transition( - up_state, - DsState::LiveRepairReady, + ConnectionMode::Faulted | ConnectionMode::Replaced => { + *state = NegotiationState::LiveRepairReady; + } + ConnectionMode::Offline => { + panic!( + "got ExtentVersions from invalid state {:?}", + self.state ); } - s => panic!("downstairs in invalid state {s}"), } /* @@ -1922,11 +1708,18 @@ impl DownstairsClient { if let Some(old_rm) = self.region_metadata.replace(dsr) { warn!(self.log, "new RM replaced this: {:?}", old_rm); } - self.negotiation_state = NegotiationState::Done; } m => panic!("invalid message in continue_negotiation: {m:?}"), } - Ok(self.negotiation_state == NegotiationState::Done) + // TODO: reconsider this part? + Ok(matches!( + self.state, + DsState::Connecting { + state: NegotiationState::WaitQuorum // new + | NegotiationState::LiveRepairReady, // live-repair + .. + } | DsState::Active // replay + )) } /// Sends the next reconciliation job to all clients @@ -1938,8 +1731,17 @@ impl DownstairsClient { job: &mut ReconcileIO, ) { // If someone has moved us out of reconcile, this is a logic error - if self.state != DsState::Reconcile { - panic!("[{}] should still be in reconcile", self.client_id); + if !matches!( + self.state, + DsState::Connecting { + state: NegotiationState::Reconcile, + mode: ConnectionMode::New + } + ) { + panic!( + "[{}] should still be in reconcile, not {:?}", + self.client_id, self.state + ); } let prev_state = job .state @@ -2048,24 +1850,34 @@ impl DownstairsClient { } } -/// How to handle "promote to active" requests -#[derive(Debug)] -enum PromoteState { - /// Send `PromoteToActive` when the state machine reaches `WaitForPromote` - Waiting, - /// We have already sent `PromoteToActive` - Sent, -} - /// Tracks client negotiation progress -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -enum NegotiationState { - Start, +#[derive( + Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "type", content = "value")] +pub enum NegotiationState { + /// Initial state + Start { + auto_promote: bool, + }, + + /// Waiting for activation by the guest + WaitActive, + + /// Waiting for the minimum number of downstairs to be present. + WaitQuorum, + WaitForPromote, WaitForRegionInfo, GetLastFlush, GetExtentVersions, - Done, + + /// Initial startup, downstairs are repairing from each other. + Reconcile, + + /// Waiting for live-repair to begin + LiveRepairReady, } /// Result value from [`DownstairsClient::enqueue`] @@ -2875,49 +2687,6 @@ pub(crate) fn validate_unencrypted_read_response( mod test { use super::*; - #[test] - fn downstairs_transition_normal() { - // Verify the correct downstairs progression - // New -> WA -> WQ -> Active - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - } - - #[test] - fn downstairs_transition_deactivate_new() { - // Verify deactivate goes to new - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - // Upstairs goes active! - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - client.checked_state_transition( - &UpstairsState::Active, - DsState::Stopping(ClientStopReason::Deactivated), - ); - client.checked_state_transition(&UpstairsState::Active, DsState::New); - } - #[test] #[should_panic] fn downstairs_transition_deactivate_not_new() { @@ -2929,280 +2698,17 @@ mod test { ); } - #[test] - #[should_panic] - fn downstairs_transition_deactivate_not_wa() { - // Verify no deactivate from wa - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Stopping(ClientStopReason::Deactivated), - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_deactivate_not_wq() { - // Verify no deactivate from wq - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Stopping(ClientStopReason::Deactivated), - ); - } - - #[test] - fn downstairs_transition_active_to_faulted() { - // Verify active upstairs can go to faulted - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Faulted, - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_disconnect_no_active() { - // Verify no activation from disconnected - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Stopping(ClientStopReason::Deactivated), - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_same_wa() { - // Verify we can't go to the same state we are in - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_same_wq() { - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - } - #[test] #[should_panic] fn downstairs_transition_same_active() { let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_no_new_to_offline() { - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Offline, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Offline, - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_same_offline() { - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Offline, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Offline, - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_backwards() { - // Verify state can't go backwards - // New -> WA -> WQ -> WA - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - } - - #[test] - #[should_panic] - fn downstairs_bad_transition_wq() { - // Verify error when going straight to WQ - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_bad_offline() { - // Verify offline cannot go to WQ - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); client.checked_state_transition( &UpstairsState::Initializing, DsState::Active, ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Offline, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_bad_active() { - // Verify active can't go back to WQ - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - } - - #[test] - fn downstairs_transition_active_faulted() { - // Verify - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); client.checked_state_transition( &UpstairsState::Initializing, DsState::Active, ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Faulted, - ); } } diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index 5eabd7294..cbcda81ef 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -10,7 +10,7 @@ use crate::{ cdt, client::{ ClientAction, ClientFaultReason, ClientNegotiationFailed, - ClientStopReason, DownstairsClient, EnqueueResult, + ClientStopReason, DownstairsClient, EnqueueResult, NegotiationState, }, guest::GuestBlockRes, io_limits::{IOLimitGuard, IOLimits}, @@ -18,10 +18,10 @@ use crate::{ stats::DownstairsStatOuter, upstairs::{UpstairsConfig, UpstairsState}, AckStatus, ActiveJobs, AllocRingBuffer, BlockRes, Buffer, ClientData, - ClientIOStateCount, ClientId, ClientMap, CrucibleError, DownstairsIO, - DownstairsMend, DsState, ExtentFix, ExtentRepairIDs, IOState, IOStateCount, - IOop, ImpactedBlocks, JobId, Message, RawReadResponse, RawWrite, - ReconcileIO, ReconciliationId, RegionDefinition, ReplaceResult, + ClientIOStateCount, ClientId, ClientMap, ConnectionMode, CrucibleError, + DownstairsIO, DownstairsMend, DsState, ExtentFix, ExtentRepairIDs, IOState, + IOStateCount, IOop, ImpactedBlocks, JobId, Message, RawReadResponse, + RawWrite, ReconcileIO, ReconciliationId, RegionDefinition, ReplaceResult, SnapshotDetails, WorkSummary, }; use crucible_common::{ @@ -443,14 +443,10 @@ impl Downstairs { #[cfg(test)] pub fn force_active(&mut self) { for cid in ClientId::iter() { - for state in - [DsState::WaitActive, DsState::WaitQuorum, DsState::Active] - { - self.clients[cid].checked_state_transition( - &UpstairsState::Initializing, - state, - ); - } + self.clients[cid].checked_state_transition( + &UpstairsState::Initializing, + DsState::Active, + ); } } @@ -679,13 +675,27 @@ impl Downstairs { // Special-case: if a Downstairs goes away midway through initial // reconciliation, then we have to manually abort reconciliation. - if self.clients.iter().any(|c| c.state() == DsState::Reconcile) { + if self.clients.iter().any(|c| { + matches!( + c.state(), + DsState::Connecting { + state: NegotiationState::Reconcile, + .. + } + ) + }) { self.abort_reconciliation(up_state); } // If this client is coming back from being offline, then mark that its // jobs must be replayed when it completes negotiation. - if self.clients[client_id].state() == DsState::Offline { + if matches!( + self.clients[client_id].state(), + DsState::Connecting { + mode: ConnectionMode::Offline, + .. + } + ) { self.clients[client_id].needs_replay(); } } @@ -729,7 +739,13 @@ impl Downstairs { // setting faulted, we return false here and let the faulting framework // take care of clearing out the skipped jobs. This then allows the // requested deactivation to finish. - if self.clients[client_id].state() == DsState::Offline { + if matches!( + self.clients[client_id].state(), + DsState::Connecting { + mode: ConnectionMode::Offline, + .. + } + ) { info!(self.log, "[{}] Offline client moved to Faulted", client_id); self.fault_client( client_id, @@ -849,7 +865,13 @@ impl Downstairs { assert!(self.reconcile.is_none()); for c in self.clients.iter() { - assert_eq!(c.state(), DsState::WaitQuorum); + assert_eq!( + c.state(), + DsState::Connecting { + state: NegotiationState::WaitQuorum, + mode: ConnectionMode::New + } + ); } } r @@ -1865,7 +1887,15 @@ impl Downstairs { // If any client have dropped out of repair-readiness (e.g. due to // failed reconciliation, timeouts, etc), then we have to kick // everything else back to the beginning. - if self.clients.iter().any(|c| c.state() != DsState::Reconcile) { + if self.clients.iter().any(|c| { + !matches!( + c.state(), + DsState::Connecting { + state: NegotiationState::Reconcile, + .. + } + ) + }) { // Something has changed, so abort this repair. // Mark any downstairs that have not changed as failed and disable // them so that they restart. @@ -1913,7 +1943,13 @@ impl Downstairs { // Mark any downstairs that have not changed as failed and disable // them so that they restart. for (i, c) in self.clients.iter_mut().enumerate() { - if c.state() == DsState::Reconcile { + if matches!( + c.state(), + DsState::Connecting { + state: NegotiationState::Reconcile, + .. + } + ) { // Restart the IO task. This will cause the Upstairs to // deactivate through a ClientAction::TaskStopped. c.abort_negotiation( @@ -1949,15 +1985,14 @@ impl Downstairs { /// /// # Panics /// If that isn't the case! - pub(crate) fn on_reconciliation_done(&mut self, from_state: DsState) { + pub(crate) fn on_reconciliation_done(&mut self, did_work: bool) { assert!(self.ds_active.is_empty()); - for (i, c) in self.clients.iter_mut().enumerate() { - assert_eq!(c.state(), from_state, "invalid state for client {i}"); + for c in self.clients.iter_mut() { c.set_active(); } - if from_state == DsState::Reconcile { + if did_work { // reconciliation completed let r = self.reconcile.take().unwrap(); assert!(r.task_list.is_empty()); @@ -1968,11 +2003,9 @@ impl Downstairs { &r, false, /* aborted */ ); } - } else if from_state == DsState::WaitQuorum { + } else { // no reconciliation was required assert!(self.reconcile.is_none()); - } else { - panic!("unexpected from_state {from_state}"); } } @@ -2557,8 +2590,10 @@ impl Downstairs { // as that info is gone to us now, so assume it was true. match self.clients[new_client_id].state() { DsState::Stopping(ClientStopReason::Replacing) - | DsState::Replaced - | DsState::LiveRepairReady + | DsState::Connecting { + mode: ConnectionMode::Replaced, + .. + } | DsState::LiveRepair => { // These states indicate a replacement is in progress. return Ok(ReplaceResult::StartedAlready); @@ -2586,11 +2621,8 @@ impl Downstairs { continue; } match self.clients[client_id].state() { - // XXX there are a bunch of states that aren't ready for IO but - // aren't listed here, e.g. all of the negotiation states. DsState::Stopping(..) - | DsState::Replaced - | DsState::LiveRepairReady + | DsState::Connecting { .. } | DsState::LiveRepair => { return Err(CrucibleError::ReplaceRequestInvalid(format!( "Replace {old} failed, downstairs {client_id} is {:?}", @@ -2624,7 +2656,13 @@ impl Downstairs { client_id: ClientId, up_state: &UpstairsState, ) { - assert_eq!(self.clients[client_id].state(), DsState::Offline); + assert!(matches!( + self.clients[client_id].state(), + DsState::Connecting { + mode: ConnectionMode::Offline, + .. + } + )); let byte_count = self.clients[client_id].total_bytes_outstanding(); let work_count = self.clients[client_id].total_live_work(); @@ -2710,33 +2748,26 @@ impl Downstairs { fn abort_repair(&mut self, up_state: &UpstairsState) { assert!(self.clients.iter().any(|c| matches!( c.state(), - DsState::LiveRepair | - // If connection aborted, and restarted, then the re-negotiation - // could have won this race, and transitioned the reconnecting - // downstairs from LiveRepair to Faulted to LiveRepairReady. - DsState::LiveRepairReady | + DsState::LiveRepair // If just a single IO reported failure, we will fault this // downstairs and it won't yet have had a chance to move back // around to LiveRepairReady yet. - DsState::Faulted | + // XXX can we actually get here? + | DsState::Connecting { + mode: ConnectionMode::Faulted, + .. + } // It's also possible for a Downstairs to be in the process of // stopping, due a fault or disconnection - DsState::Stopping(..) // XXX should we be more specific here? + | DsState::Stopping(..) // XXX should we be more specific here? ))); for i in ClientId::iter() { - match self.clients[i].state() { - DsState::LiveRepair => { - self.fault_client( - i, - up_state, - ClientFaultReason::FailedLiveRepair, - ); - } - DsState::LiveRepairReady => { - // TODO I don't think this is necessary - self.skip_all_jobs(i); - } - _ => {} + if matches!(self.clients[i].state(), DsState::LiveRepair) { + self.fault_client( + i, + up_state, + ClientFaultReason::FailedLiveRepair, + ); } // Set repair_info to None, so that the next ExtentFlushClose sees // it empty (as expected). repair_info is set on all clients, even @@ -3231,8 +3262,12 @@ impl Downstairs { */ let ds_state = self.clients[client_id].state(); match ds_state { - DsState::Active | DsState::Reconcile | DsState::LiveRepair => {} - DsState::Faulted => { + DsState::Active | DsState::LiveRepair => {} + DsState::Stopping(ClientStopReason::Fault(..)) + | DsState::Connecting { + mode: ConnectionMode::Faulted, + .. + } => { error!( self.clients[client_id].log, "Dropping job {}, this downstairs is faulted", ds_id, @@ -3590,14 +3625,6 @@ impl Downstairs { let mut ds = Self::test_default(); for cid in ClientId::iter() { - ds.clients[cid].checked_state_transition( - &UpstairsState::Active, - DsState::WaitActive, - ); - ds.clients[cid].checked_state_transition( - &UpstairsState::Active, - DsState::WaitQuorum, - ); ds.clients[cid].checked_state_transition( &UpstairsState::Active, DsState::Active, @@ -3621,11 +3648,10 @@ impl Downstairs { // Set one of the clients to want a repair let to_repair = ClientId::new(1); - ds.clients[to_repair] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); - ds.clients[to_repair].checked_state_transition( + ds.fault_client( + to_repair, &UpstairsState::Active, - DsState::LiveRepairReady, + ClientFaultReason::RequestedFault, ); ds.clients[to_repair].checked_state_transition( &UpstairsState::Active, @@ -4347,7 +4373,7 @@ struct DownstairsBackpressureConfig { pub(crate) mod test { use super::{ ClientFaultReason, ClientNegotiationFailed, ClientStopReason, - Downstairs, PendingJob, + ConnectionMode, Downstairs, NegotiationState, PendingJob, }; use crate::{ downstairs::{LiveRepairData, LiveRepairState, ReconcileData}, @@ -4409,21 +4435,48 @@ pub(crate) mod test { } } - fn set_all_reconcile(ds: &mut Downstairs) { - for i in ClientId::iter() { - ds.clients[i].checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitActive, - ); - ds.clients[i].checked_state_transition( - &UpstairsState::Initializing, - DsState::WaitQuorum, - ); - ds.clients[i].checked_state_transition( - &UpstairsState::Initializing, - DsState::Reconcile, + /// Helper function to legally move the given client to live-repair + fn move_to_live_repair(ds: &mut Downstairs, to_repair: ClientId) { + ds.fault_client( + to_repair, + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); + let mode = ConnectionMode::Faulted; + for state in [ + NegotiationState::Start { auto_promote: true }, + NegotiationState::WaitForPromote, + NegotiationState::WaitForRegionInfo, + NegotiationState::GetExtentVersions, + NegotiationState::LiveRepairReady, + ] { + ds.clients[to_repair].checked_state_transition( + &UpstairsState::Active, + DsState::Connecting { state, mode }, ); } + ds.clients[to_repair].checked_state_transition( + &UpstairsState::Active, + DsState::LiveRepair, + ); + } + + fn set_all_reconcile(ds: &mut Downstairs) { + let mode = ConnectionMode::New; + for cid in ClientId::iter() { + for state in [ + NegotiationState::Start { auto_promote: true }, + NegotiationState::WaitForPromote, + NegotiationState::WaitForRegionInfo, + NegotiationState::GetExtentVersions, + NegotiationState::Reconcile, + ] { + ds.clients[cid].checked_state_transition( + &UpstairsState::Active, + DsState::Connecting { state, mode }, + ); + } + } } #[test] @@ -6045,7 +6098,6 @@ pub(crate) mod test { fn send_next_reconciliation_req_none() { // No repairs on the queue, should return None let mut ds = Downstairs::test_default(); - set_all_reconcile(&mut ds); ds.reconcile = Some(ReconcileData::new([])); @@ -6081,6 +6133,7 @@ pub(crate) mod test { }, ), ])); + set_all_reconcile(&mut ds); // Send the first reconciliation req assert!(!ds.send_next_reconciliation_req()); @@ -6116,7 +6169,6 @@ pub(crate) mod test { // in the FailedReconcile state. Verify that attempts to get new work // after a failed repair now return none. let mut ds = Downstairs::test_default(); - set_all_reconcile(&mut ds); let up_state = UpstairsState::Active; let rep_id = ReconciliationId(0); @@ -6158,7 +6210,6 @@ pub(crate) mod test { fn reconcile_rep_in_progress_bad1() { // Verify the same downstairs can't mark a job in progress twice let mut ds = Downstairs::test_default(); - set_all_reconcile(&mut ds); let rep_id = ReconciliationId(0); ds.reconcile = Some(ReconcileData::new([ReconcileIO::new( @@ -6177,7 +6228,6 @@ pub(crate) mod test { #[test] fn reconcile_repair_workflow_1() { let mut ds = Downstairs::test_default(); - set_all_reconcile(&mut ds); let up_state = UpstairsState::Active; let close_id = ReconciliationId(0); @@ -6225,7 +6275,6 @@ pub(crate) mod test { fn reconcile_repair_workflow_2() { // Verify Done or Skipped works when checking for a complete repair let mut ds = Downstairs::test_default(); - set_all_reconcile(&mut ds); let up_state = UpstairsState::Active; let rep_id = ReconciliationId(1); @@ -6269,7 +6318,6 @@ pub(crate) mod test { fn reconcile_repair_inprogress_not_done() { // Verify Done or Skipped works when checking for a complete repair let mut ds = Downstairs::test_default(); - set_all_reconcile(&mut ds); let up_state = UpstairsState::Active; let rep_id = ReconciliationId(1); @@ -6304,7 +6352,6 @@ pub(crate) mod test { // Verify we can't start a new job before the old is finished. // Verify Done or Skipped works when checking for a complete repair let mut ds = Downstairs::test_default(); - set_all_reconcile(&mut ds); let up_state = UpstairsState::Active; let close_id = ReconciliationId(0); @@ -6359,8 +6406,11 @@ pub(crate) mod test { ds.force_active(); // Mark client 1 as faulted - ds.clients[ClientId::new(1)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + ds.fault_client( + ClientId::new(1), + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); // Create a write, enqueue it on both the downstairs // and the guest work queues. @@ -6410,11 +6460,14 @@ pub(crate) mod test { let mut ds = Downstairs::test_default(); ds.force_active(); - // Mark client 1 as faulted - ds.clients[ClientId::new(1)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); - ds.clients[ClientId::new(2)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + // Mark clients 1 and 2 as faulted + for cid in [ClientId::new(1), ClientId::new(2)] { + ds.fault_client( + cid, + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); + } // Create a write, enqueue it on both the downstairs // and the guest work queues. @@ -6452,8 +6505,11 @@ pub(crate) mod test { // will result in an error back to the guest. let mut ds = Downstairs::test_default(); ds.force_active(); - ds.clients[ClientId::new(2)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + ds.fault_client( + ClientId::new(2), + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); // Create a write, enqueue it on both the downstairs // and the guest work queues. @@ -6493,8 +6549,11 @@ pub(crate) mod test { // from acking back OK for a flush to the guest. let mut ds = Downstairs::test_default(); ds.force_active(); - ds.clients[ClientId::new(1)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + ds.fault_client( + ClientId::new(1), + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); // Create a flush, enqueue it on both the downstairs // and the guest work queues. @@ -6527,10 +6586,13 @@ pub(crate) mod test { // back to the guest. let mut ds = Downstairs::test_default(); ds.force_active(); - ds.clients[ClientId::new(1)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); - ds.clients[ClientId::new(2)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + for cid in [ClientId::new(1), ClientId::new(2)] { + ds.fault_client( + cid, + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); + } // Create a flush, enqueue it on both the downstairs // and the guest work queues. @@ -6552,8 +6614,11 @@ pub(crate) mod test { // error back to the guest. let mut ds = Downstairs::test_default(); ds.force_active(); - ds.clients[ClientId::new(0)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + ds.fault_client( + ClientId::new(0), + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); // Create a flush, enqueue it on both the downstairs // and the guest work queues. @@ -7363,8 +7428,11 @@ pub(crate) mod test { // downstairs has failed. One write, one read, and one flush. let mut ds = Downstairs::test_default(); ds.force_active(); - ds.clients[ClientId::new(0)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + ds.fault_client( + ClientId::new(0), + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); // Create a write let write_one = ds.create_and_enqueue_generic_write_eob(false); @@ -7475,10 +7543,13 @@ pub(crate) mod test { // one downstairs. let mut ds = Downstairs::test_default(); ds.force_active(); - ds.clients[ClientId::new(0)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); - ds.clients[ClientId::new(2)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + for cid in [ClientId::new(0), ClientId::new(2)] { + ds.fault_client( + cid, + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); + } // Create a write let write_one = ds.create_and_enqueue_generic_write_eob(false); @@ -7571,9 +7642,10 @@ pub(crate) mod test { let mut ds = Downstairs::test_default(); ds.force_active(); for cid in ClientId::iter() { - ds.clients[cid].checked_state_transition( + ds.fault_client( + cid, &UpstairsState::Active, - DsState::Faulted, + ClientFaultReason::RequestedFault, ); } @@ -7605,9 +7677,10 @@ pub(crate) mod test { let mut ds = Downstairs::test_default(); ds.force_active(); for cid in ClientId::iter() { - ds.clients[cid].checked_state_transition( + ds.fault_client( + cid, &UpstairsState::Active, - DsState::Faulted, + ClientFaultReason::RequestedFault, ); } @@ -7637,9 +7710,10 @@ pub(crate) mod test { let mut ds = Downstairs::test_default(); ds.force_active(); for cid in ClientId::iter() { - ds.clients[cid].checked_state_transition( + ds.fault_client( + cid, &UpstairsState::Active, - DsState::Faulted, + ClientFaultReason::RequestedFault, ); } @@ -7667,9 +7741,10 @@ pub(crate) mod test { let mut ds = Downstairs::test_default(); ds.force_active(); for cid in ClientId::iter() { - ds.clients[cid].checked_state_transition( + ds.fault_client( + cid, &UpstairsState::Active, - DsState::Faulted, + ClientFaultReason::RequestedFault, ); } @@ -7720,9 +7795,10 @@ pub(crate) mod test { let mut ds = Downstairs::test_default(); ds.force_active(); for cid in ClientId::iter() { - ds.clients[cid].checked_state_transition( + ds.fault_client( + cid, &UpstairsState::Active, - DsState::Faulted, + ClientFaultReason::RequestedFault, ); } @@ -9642,19 +9718,7 @@ pub(crate) mod test { // Fault the downstairs let to_repair = ClientId::new(1); - ds.fault_client( - to_repair, - &UpstairsState::Active, - ClientFaultReason::RequestedFault, - ); - for s in [ - DsState::Faulted, - DsState::LiveRepairReady, - DsState::LiveRepair, - ] { - ds.clients[to_repair] - .checked_state_transition(&UpstairsState::Active, s); - } + move_to_live_repair(&mut ds, to_repair); let next_id = ds.peek_next_id().0; ds.repair = Some(LiveRepairData { @@ -9811,19 +9875,7 @@ pub(crate) mod test { // Fault the downstairs let to_repair = ClientId::new(1); - ds.fault_client( - to_repair, - &UpstairsState::Active, - ClientFaultReason::RequestedFault, - ); - for s in [ - DsState::Faulted, - DsState::LiveRepairReady, - DsState::LiveRepair, - ] { - ds.clients[to_repair] - .checked_state_transition(&UpstairsState::Active, s); - } + move_to_live_repair(&mut ds, to_repair); let next_id = ds.peek_next_id().0; @@ -9972,10 +10024,6 @@ pub(crate) mod test { &UpstairsState::Active, ClientFaultReason::RequestedFault, ); - for s in [DsState::Faulted, DsState::LiveRepairReady] { - ds.clients[to_repair] - .checked_state_transition(&UpstairsState::Active, s); - } // Start the repair normally. This enqueues the close & reopen jobs, and // reserves Job IDs for the repair/noop diff --git a/upstairs/src/dummy_downstairs_tests.rs b/upstairs/src/dummy_downstairs_tests.rs index fb7254fbd..984d12d69 100644 --- a/upstairs/src/dummy_downstairs_tests.rs +++ b/upstairs/src/dummy_downstairs_tests.rs @@ -12,8 +12,10 @@ use crate::guest::Guest; use crate::up_main; use crate::BlockIO; use crate::Buffer; +use crate::ConnectionMode; use crate::CrucibleError; use crate::DsState; +use crate::NegotiationState; use crate::{ IO_CACHED_MAX_BYTES, IO_CACHED_MAX_JOBS, IO_OUTSTANDING_MAX_BYTES, IO_OUTSTANDING_MAX_JOBS, @@ -1562,7 +1564,13 @@ async fn test_byte_fault_condition() { // Check to make sure that happened let ds = harness.guest.downstairs_state().await.unwrap(); - assert_eq!(ds[ClientId::new(0)], DsState::Faulted); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Faulted, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); @@ -1633,7 +1641,13 @@ async fn test_byte_fault_condition_offline() { // Check to make sure that happened let ds = harness.guest.downstairs_state().await.unwrap(); - assert_eq!(ds[ClientId::new(0)], DsState::Offline); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Offline, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); @@ -1662,11 +1676,23 @@ async fn test_byte_fault_condition_offline() { let ds = harness.guest.downstairs_state().await.unwrap(); if (i + 1) * WRITE_SIZE < IO_OUTSTANDING_MAX_BYTES as usize { - assert_eq!(ds[ClientId::new(0)], DsState::Offline); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Offline, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); } else { - assert_eq!(ds[ClientId::new(0)], DsState::Faulted); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Faulted, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); } @@ -1711,7 +1737,13 @@ async fn test_offline_can_deactivate() { // Check to make sure downstairs 1 is now offline. let ds = harness.guest.downstairs_state().await.unwrap(); - assert_eq!(ds[ClientId::new(0)], DsState::Offline); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Offline, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); @@ -1748,7 +1780,13 @@ async fn test_offline_with_io_can_deactivate() { // Check to make sure downstairs 1 is now offline. let ds = harness.guest.downstairs_state().await.unwrap(); - assert_eq!(ds[ClientId::new(0)], DsState::Offline); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Offline, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); @@ -1799,9 +1837,15 @@ async fn test_all_offline_with_io_can_deactivate() { // Check to make sure all downstairs are offline. let ds = harness.guest.downstairs_state().await.unwrap(); - assert_eq!(ds[ClientId::new(0)], DsState::Offline); - assert_eq!(ds[ClientId::new(1)], DsState::Offline); - assert_eq!(ds[ClientId::new(2)], DsState::Offline); + for cid in ClientId::iter() { + assert_eq!( + ds[cid], + DsState::Connecting { + mode: ConnectionMode::Offline, + state: NegotiationState::Start { auto_promote: true } + } + ); + } // We must `spawn` here because `read` will wait for the response to // come back before returning @@ -1898,7 +1942,13 @@ async fn test_job_fault_condition() { // Check to make sure that happened let ds = harness.guest.downstairs_state().await.unwrap(); - assert_eq!(ds[ClientId::new(0)], DsState::Faulted); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Faulted, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); @@ -1964,7 +2014,13 @@ async fn test_job_fault_condition_offline() { // Check to make sure that happened let ds = harness.guest.downstairs_state().await.unwrap(); - assert_eq!(ds[ClientId::new(0)], DsState::Offline); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Offline, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); @@ -2007,12 +2063,24 @@ async fn test_job_fault_condition_offline() { let ds = harness.guest.downstairs_state().await.unwrap(); if i + barrier_count < IO_OUTSTANDING_MAX_JOBS { // At this point, we should still be offline - assert_eq!(ds[ClientId::new(0)], DsState::Offline); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Offline, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); } else { // After ds1 is kicked out, we shouldn't see any more messages - assert_eq!(ds[ClientId::new(0)], DsState::Faulted); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Faulted, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); } @@ -2752,7 +2820,13 @@ async fn test_no_send_offline() { // Check to make sure that happened let ds = harness.guest.downstairs_state().await.unwrap(); - assert_eq!(ds[ClientId::new(0)], DsState::Offline); + assert_eq!( + ds[ClientId::new(0)], + DsState::Connecting { + mode: ConnectionMode::Offline, + state: NegotiationState::Start { auto_promote: true } + } + ); assert_eq!(ds[ClientId::new(1)], DsState::Active); assert_eq!(ds[ClientId::new(2)], DsState::Active); info!(harness.log, "DS1 is offline"); diff --git a/upstairs/src/lib.rs b/upstairs/src/lib.rs index e5a105e79..b409cc140 100644 --- a/upstairs/src/lib.rs +++ b/upstairs/src/lib.rs @@ -67,6 +67,7 @@ mod stats; pub use client::{ ClientFaultReason, ClientNegotiationFailed, ClientStopReason, + NegotiationState, }; pub use crucible_common::impacted_blocks::*; @@ -790,89 +791,78 @@ pub(crate) struct RawReadResponse { #[serde(rename_all = "snake_case")] #[serde(tag = "type", content = "value")] pub enum DsState { - /* - * New connection - */ - New, - /* - * Waiting for activation signal. - */ - WaitActive, - /* - * Waiting for the minimum number of downstairs to be present. - */ - WaitQuorum, - /* - * Initial startup, downstairs are repairing from each other. - */ - Reconcile, - /* - * Ready for and/or currently receiving IO - */ + /// New connection + Connecting { + state: NegotiationState, + mode: ConnectionMode, + }, + + /// Ready for and/or currently receiving IO Active, - /* - * IO attempts to this downstairs are failing at too high of a - * rate, or it is not able to keep up, or it is having some - * error such that we can no longer use it. - */ - Faulted, - /* - * This downstairs was failed, but has disconnected and now we - * are ready to repair it. - */ - LiveRepairReady, - /* - * This downstairs is undergoing LiveRepair - */ + + /// This downstairs is undergoing LiveRepair LiveRepair, - /* - * This downstairs was active, but is now no longer connected. - * We may have work for it in memory, so a replay is possible - * if this downstairs reconnects in time. - */ - Offline, - /* - * The current downstairs tasks have ended and the replacement has - * begun. - */ - Replaced, /// The IO task for the client is being stopped - Stopping(crate::client::ClientStopReason), + Stopping(ClientStopReason), } impl std::fmt::Display for DsState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - DsState::New => { - write!(f, "New") - } - DsState::WaitActive => { + DsState::Connecting { + state: NegotiationState::WaitActive, + .. + } => { write!(f, "WaitActive") } - DsState::WaitQuorum => { + DsState::Connecting { + state: NegotiationState::WaitQuorum, + .. + } => { write!(f, "WaitQuorum") } - DsState::Reconcile => { + DsState::Connecting { + state: NegotiationState::Reconcile, + .. + } => { write!(f, "Reconcile") } + DsState::Connecting { + state: NegotiationState::LiveRepairReady, + .. + } => { + write!(f, "LiveRepairReady") + } DsState::Active => { write!(f, "Active") } - DsState::Faulted => { - write!(f, "Faulted") - } - DsState::LiveRepairReady => { - write!(f, "LiveRepairReady") + DsState::Connecting { + mode: ConnectionMode::New, + .. + } => { + write!(f, "New") } - DsState::LiveRepair => { - write!(f, "LiveRepair") + DsState::Connecting { + mode: ConnectionMode::Faulted, + .. + } => { + write!(f, "Faulted") } - DsState::Offline => { + DsState::Connecting { + mode: ConnectionMode::Offline, + .. + } => { write!(f, "Offline") } - DsState::Replaced => { + DsState::Connecting { + mode: ConnectionMode::Replaced, + .. + } => { write!(f, "Replaced") } + DsState::LiveRepair => { + write!(f, "LiveRepair") + } DsState::Stopping(..) => { write!(f, "Stopping") } @@ -880,6 +870,15 @@ impl std::fmt::Display for DsState { } } +#[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum ConnectionMode { + Offline, + New, + Faulted, + Replaced, +} + /* * A unit of work for downstairs that is put into the hashmap. */ diff --git a/upstairs/src/live_repair.rs b/upstairs/src/live_repair.rs index 543d8c3e9..a45d5f4ac 100644 --- a/upstairs/src/live_repair.rs +++ b/upstairs/src/live_repair.rs @@ -1109,10 +1109,26 @@ pub mod repair_test { assert_eq!(up.downstairs.last_repair_extent(), None); assert!(up.downstairs.repair().is_none()); - // Start the LiveRepair - let client = &mut up.downstairs.clients[ClientId::new(1)]; - client.checked_state_transition(&up.state, DsState::Faulted); - client.checked_state_transition(&up.state, DsState::LiveRepairReady); + // Start the LiveRepair, manually walking through states + let client = ClientId::new(1); + up.downstairs.fault_client( + client, + &up.state, + ClientFaultReason::RequestedFault, + ); + let mode = ConnectionMode::Faulted; + for state in [ + NegotiationState::Start { auto_promote: true }, + NegotiationState::WaitForPromote, + NegotiationState::WaitForRegionInfo, + NegotiationState::GetExtentVersions, + NegotiationState::LiveRepairReady, + ] { + up.downstairs.clients[client].checked_state_transition( + &up.state, + DsState::Connecting { state, mode }, + ); + } up.on_repair_check(); assert!(up.downstairs.live_repair_in_progress()); assert_eq!(up.downstairs.last_repair_extent(), Some(ExtentId(0))); diff --git a/upstairs/src/upstairs.rs b/upstairs/src/upstairs.rs index 8c7c71a20..7366ad297 100644 --- a/upstairs/src/upstairs.rs +++ b/upstairs/src/upstairs.rs @@ -2,7 +2,9 @@ //! Data structures specific to Crucible's `struct Upstairs` use crate::{ cdt, - client::{ClientAction, ClientRunResult, ClientStopReason}, + client::{ + ClientAction, ClientRunResult, ClientStopReason, NegotiationState, + }, control::ControlRequest, deferred::{ DeferredBlockOp, DeferredMessage, DeferredQueue, DeferredRead, @@ -12,9 +14,9 @@ use crate::{ extent_from_offset, io_limits::IOLimitGuard, stats::UpStatOuter, - BlockOp, BlockRes, Buffer, ClientId, ClientMap, CrucibleOpts, DsState, - EncryptionContext, GuestIoHandle, Message, RegionDefinition, - RegionDefinitionStatus, SnapshotDetails, WQCounts, + BlockOp, BlockRes, Buffer, ClientId, ClientMap, ConnectionMode, + CrucibleOpts, DsState, EncryptionContext, GuestIoHandle, Message, + RegionDefinition, RegionDefinitionStatus, SnapshotDetails, WQCounts, }; use crucible_client_types::RegionExtentInfo; use crucible_common::{BlockIndex, CrucibleError}; @@ -616,13 +618,21 @@ impl Upstairs { if matches!(&self.state, UpstairsState::Deactivating(..)) { info!(self.log, "checking for deactivation"); for i in ClientId::iter() { + debug!( + self.log, + "client {i} has state {:?}", + self.downstairs.clients[i].state() + ); // Clients become Stopping, then New (when the IO task // completes and the client is restarted). We don't try to // deactivate them _again_ in such cases. if matches!( self.downstairs.clients[i].state(), DsState::Stopping(ClientStopReason::Deactivated) - | DsState::New + | DsState::Connecting { + mode: ConnectionMode::New, + .. + } ) { debug!(self.log, "already deactivated {i}"); } else if self.downstairs.try_deactivate(i, &self.state) { @@ -729,7 +739,13 @@ impl Upstairs { } for cid in ClientId::iter() { - if self.downstairs.clients[cid].state() == DsState::Offline { + if matches!( + self.downstairs.clients[cid].state(), + DsState::Connecting { + mode: ConnectionMode::Offline, + .. + } + ) { self.downstairs.check_gone_too_long(cid, &self.state); } } @@ -893,11 +909,15 @@ impl Upstairs { // before we begin a live repair. let repair_in_progress = self.downstairs.live_repair_in_progress(); - let any_in_repair_ready = self - .downstairs - .clients - .iter() - .any(|c| c.state() == DsState::LiveRepairReady); + let any_in_repair_ready = self.downstairs.clients.iter().any(|c| { + matches!( + c.state(), + DsState::Connecting { + state: NegotiationState::LiveRepairReady, + .. + } + ) + }); if repair_in_progress { info!(self.log, "Live Repair already running"); @@ -1151,10 +1171,9 @@ impl Upstairs { #[cfg(test)] BlockOp::GetDownstairsState { done } => { - let mut out = crate::ClientData::new(DsState::New); - for i in ClientId::iter() { - out[i] = self.downstairs.clients[i].state(); - } + let out = crate::ClientData::from_fn(|i| { + self.downstairs.clients[i].state() + }); done.send_ok(out); } @@ -1702,7 +1721,10 @@ impl Upstairs { match self.downstairs.clients[client_id].state() { DsState::Active => (), - DsState::WaitQuorum => { + DsState::Connecting { + state: NegotiationState::WaitQuorum, + .. + } => { // See if we have a quorum if self.connect_region_set() { // We connected normally, so there's no need @@ -1711,7 +1733,10 @@ impl Upstairs { } } - DsState::LiveRepairReady => { + DsState::Connecting { + state: NegotiationState::LiveRepairReady, + .. + } => { // Immediately check for live-repair self.repair_check_deadline = Some(Instant::now()); @@ -1743,7 +1768,7 @@ impl Upstairs { &self.state, ) { // reconciliation is done, great work everyone - self.on_reconciliation_done(DsState::Reconcile); + self.on_reconciliation_done(true); } } @@ -1859,12 +1884,16 @@ impl Upstairs { * Make sure all downstairs are in the correct state before we * proceed. */ - let not_ready = self - .downstairs - .clients - .iter() - .any(|c| c.state() != DsState::WaitQuorum); - if not_ready { + let ready = self.downstairs.clients.iter().all(|c| { + matches!( + c.state(), + DsState::Connecting { + state: NegotiationState::WaitQuorum, + .. + } + ) + }); + if !ready { info!(self.log, "Waiting for more clients to be ready"); return false; } @@ -1901,7 +1930,7 @@ impl Upstairs { } Ok(false) => { info!(self.log, "No downstairs reconciliation required"); - self.on_reconciliation_done(DsState::WaitQuorum); + self.on_reconciliation_done(false); info!(self.log, "Set Active after no reconciliation"); true } @@ -1909,10 +1938,10 @@ impl Upstairs { } /// Called when reconciliation is complete - fn on_reconciliation_done(&mut self, from_state: DsState) { + fn on_reconciliation_done(&mut self, did_work: bool) { // This should only ever be called if reconciliation completed // successfully; make some assertions to that effect. - self.downstairs.on_reconciliation_done(from_state); + self.downstairs.on_reconciliation_done(did_work); info!(self.log, "All required reconciliation work is completed"); info!( @@ -2098,7 +2127,7 @@ impl Upstairs { pub(crate) mod test { use super::*; use crate::{ - client::ClientStopReason, + client::{ClientFaultReason, ClientStopReason}, test::{make_encrypted_upstairs, make_upstairs}, Block, BlockOp, BlockOpWaiter, DsState, JobId, }; @@ -2134,9 +2163,7 @@ pub(crate) mod test { let mut up = create_test_upstairs(); // Move our downstairs client fail_id to LiveRepair. - let client = &mut up.downstairs.clients[or_ds]; - client.checked_state_transition(&up.state, DsState::Faulted); - client.checked_state_transition(&up.state, DsState::LiveRepairReady); + to_live_repair_ready(&mut up, or_ds); // Start repairing the downstairs; this also enqueues the jobs up.apply(UpstairsAction::RepairCheck); @@ -2146,7 +2173,7 @@ pub(crate) mod test { assert!(up.repair_check_deadline.is_none()); assert!(up.downstairs.live_repair_in_progress()); - // The first thing that should happen after we start repair_exetnt + // The first thing that should happen after we start repair_extent // is two jobs show up on the work queue, one for close and one for // the eventual re-open. Wait here for those jobs to show up on the // work queue before returning. @@ -2155,15 +2182,49 @@ pub(crate) mod test { up } + /// Helper function to legally move the given client to live-repair ready + fn to_live_repair_ready(up: &mut Upstairs, to_repair: ClientId) { + up.downstairs.fault_client( + to_repair, + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); + let mode = ConnectionMode::Faulted; + for state in [ + NegotiationState::Start { auto_promote: true }, + NegotiationState::WaitForPromote, + NegotiationState::WaitForRegionInfo, + NegotiationState::GetExtentVersions, + NegotiationState::LiveRepairReady, + ] { + up.downstairs.clients[to_repair].checked_state_transition( + &UpstairsState::Active, + DsState::Connecting { state, mode }, + ); + } + } + #[test] fn reconcile_not_ready() { // Verify reconcile returns false when a downstairs is not ready let mut up = Upstairs::test_default(None); - up.ds_transition(ClientId::new(0), DsState::WaitActive); - up.ds_transition(ClientId::new(0), DsState::WaitQuorum); - - up.ds_transition(ClientId::new(1), DsState::WaitActive); - up.ds_transition(ClientId::new(1), DsState::WaitQuorum); + for cid in [ClientId::new(0), ClientId::new(1)] { + for state in [ + NegotiationState::WaitActive, + NegotiationState::WaitForPromote, + NegotiationState::WaitForRegionInfo, + NegotiationState::GetExtentVersions, + NegotiationState::WaitQuorum, + ] { + up.ds_transition( + cid, + DsState::Connecting { + mode: ConnectionMode::New, + state, + }, + ); + } + } let res = up.connect_region_set(); assert!(!res); @@ -2244,7 +2305,15 @@ pub(crate) mod test { })); // This causes the downstairs state to be reinitialized - assert_eq!(up.ds_state(client_id), DsState::New); + assert_eq!( + up.ds_state(client_id), + DsState::Connecting { + state: NegotiationState::Start { + auto_promote: false + }, + mode: ConnectionMode::New + } + ); if client_id.get() < 2 { assert!(matches!(up.state, UpstairsState::Deactivating { .. })); @@ -3414,8 +3483,7 @@ pub(crate) mod test { up.force_active().unwrap(); // Force client 1 into LiveRepairReady - up.ds_transition(ClientId::new(1), DsState::Faulted); - up.ds_transition(ClientId::new(1), DsState::LiveRepairReady); + to_live_repair_ready(&mut up, ClientId::new(1)); up.on_repair_check(); assert!(up.repair_check_deadline.is_none()); assert!(up.downstairs.live_repair_in_progress()); @@ -3434,10 +3502,9 @@ pub(crate) mod test { let mut up = Upstairs::test_default(Some(ddef)); up.force_active().unwrap(); + // Force clients 1 and 2 into LiveRepairReady for i in [1, 2].into_iter().map(ClientId::new) { - // Force client 1 into LiveRepairReady - up.ds_transition(i, DsState::Faulted); - up.ds_transition(i, DsState::LiveRepairReady); + to_live_repair_ready(&mut up, i); } up.on_repair_check(); assert!(up.repair_check_deadline.is_none()); @@ -3459,8 +3526,7 @@ pub(crate) mod test { let mut up = Upstairs::test_default(Some(ddef)); up.force_active().unwrap(); - up.ds_transition(ClientId::new(1), DsState::Faulted); - up.ds_transition(ClientId::new(1), DsState::LiveRepairReady); + to_live_repair_ready(&mut up, ClientId::new(1)); up.ds_transition(ClientId::new(1), DsState::LiveRepair); // Start the live-repair @@ -3471,8 +3537,7 @@ pub(crate) mod test { // Pretend that DS 0 faulted then came back through to LiveRepairReady; // we won't halt the existing repair, but will configure // repair_check_deadline to check again in the future. - up.ds_transition(ClientId::new(0), DsState::Faulted); - up.ds_transition(ClientId::new(0), DsState::LiveRepairReady); + to_live_repair_ready(&mut up, ClientId::new(0)); up.on_repair_check(); assert!(up.downstairs.live_repair_in_progress()); @@ -3488,8 +3553,7 @@ pub(crate) mod test { let mut up = Upstairs::test_default(Some(ddef)); up.force_active().unwrap(); - up.ds_transition(ClientId::new(1), DsState::Faulted); - up.ds_transition(ClientId::new(1), DsState::LiveRepairReady); + to_live_repair_ready(&mut up, ClientId::new(1)); up.on_repair_check(); assert!(up.repair_check_deadline.is_none()); @@ -3648,7 +3712,15 @@ pub(crate) mod test { // Verify after the ds_missing, all downstairs are New for c in up.downstairs.clients.iter() { - assert_eq!(c.state(), DsState::New); + assert_eq!( + c.state(), + DsState::Connecting { + mode: ConnectionMode::New, + state: NegotiationState::Start { + auto_promote: false + } + } + ); } } @@ -4358,12 +4430,13 @@ pub(crate) mod test { let mut up = make_upstairs(); up.force_active().unwrap(); - up.downstairs.clients[ClientId::new(0)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); - up.downstairs.clients[ClientId::new(1)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); - up.downstairs.clients[ClientId::new(2)] - .checked_state_transition(&UpstairsState::Active, DsState::Faulted); + for i in ClientId::iter() { + up.downstairs.fault_client( + i, + &UpstairsState::Active, + ClientFaultReason::RequestedFault, + ); + } let data = Buffer::new(1, 512); let offset = BlockIndex(7); From 671ab544fb0c7fa1d427699cf5e7efc971b04254 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 25 Nov 2024 12:18:59 -0500 Subject: [PATCH 02/21] Remove needs_replay in favor of a returned value --- upstairs/src/client.rs | 94 +++++++++++++++++--------------------- upstairs/src/downstairs.rs | 37 +++++---------- upstairs/src/upstairs.rs | 61 ++++++++++--------------- 3 files changed, 78 insertions(+), 114 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index dbdcde210..63e0b1feb 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -142,9 +142,6 @@ pub(crate) struct DownstairsClient { /// This is set to `None` during initialization pub(crate) repair_addr: Option, - /// Flag indicating that the Upstairs should replay jobs to this client - needs_replay: bool, - /// TLS context (if present) /// /// This is passed as a pointer to minimize copies @@ -212,7 +209,6 @@ impl DownstairsClient { client_id, io_limits, region_uuid: None, - needs_replay: false, tls_context, log, target_addr, @@ -258,7 +254,6 @@ impl DownstairsClient { crate::IO_OUTSTANDING_MAX_BYTES as usize * 3 / 2, ), region_uuid: None, - needs_replay: false, tls_context: None, log: crucible_common::build_logger(), target_addr: None, @@ -326,7 +321,6 @@ impl DownstairsClient { warn!(self.log, "failed to send stop request") } self.checked_state_transition(up_state, DsState::Stopping(r)); - debug!(self.log, "NEW IO STATE {:?}", self.state); } else { warn!(self.log, "client task is already stopping") } @@ -574,7 +568,6 @@ impl DownstairsClient { // entirely; the repair address could have changed in any of these // cases. self.repair_addr = None; - self.needs_replay = false; // If the upstairs is already active (or trying to go active), then the // downstairs should automatically call PromoteToActive when it reaches @@ -630,16 +623,6 @@ impl DownstairsClient { self.start_task(true, auto_promote); } - /// Sets the `needs_replay` flag - pub(crate) fn needs_replay(&mut self) { - self.needs_replay = true; - } - - /// Returns and clears the `needs_replay` flag - pub(crate) fn check_replay(&mut self) -> bool { - std::mem::take(&mut self.needs_replay) - } - /// Returns the last flush ID handled by this client pub(crate) fn last_flush(&self) -> JobId { self.last_flush @@ -1191,28 +1174,29 @@ impl DownstairsClient { /// Skips from `LiveRepairReady` to `Active`; a no-op otherwise /// /// # Panics - /// If this downstairs is not read-only, or we weren't previously in - /// `NegotiationState::LiveRepairReady` + /// If this downstairs is not read-only pub(crate) fn skip_live_repair(&mut self, up_state: &UpstairsState) { let DsState::Connecting { state, .. } = self.state else { - panic!("invalid call to SkipLiveRepair"); + return; }; - assert_eq!(state, NegotiationState::LiveRepairReady); - assert!(self.cfg.read_only); + if state == NegotiationState::LiveRepairReady { + assert!(self.cfg.read_only); - // TODO: could we do this transition early, by automatically - // skipping LiveRepairReady if read-only? - self.checked_state_transition(up_state, DsState::Active); - self.stats.ro_lr_skipped += 1; + // TODO: could we do this transition early, by automatically + // skipping LiveRepairReady if read-only? + self.checked_state_transition(up_state, DsState::Active); + self.stats.ro_lr_skipped += 1; + } } - /// Moves from `LiveRepairReady` to `LiveRepair`; panics otherwise + /// Moves from `LiveRepairReady` to `LiveRepair`, a no-op otherwise pub(crate) fn start_live_repair(&mut self, up_state: &UpstairsState) { let DsState::Connecting { state, .. } = self.state else { - panic!("invalid call to SkipLiveRepair"); + return; }; - assert_eq!(state, NegotiationState::LiveRepairReady); - self.checked_state_transition(up_state, DsState::LiveRepair); + if state == NegotiationState::LiveRepairReady { + self.checked_state_transition(up_state, DsState::LiveRepair); + } } /// Continues the negotiation and initial reconciliation process @@ -1220,13 +1204,14 @@ impl DownstairsClient { /// Returns an error if the upstairs should go inactive, which occurs if the /// error is at or after `Message::YouAreNowActive`. /// - /// Returns `true` if negotiation for this downstairs is complete + /// Returns a flag indicating how to proceed + #[must_use] pub(crate) fn continue_negotiation( &mut self, m: Message, up_state: &UpstairsState, ddef: &mut RegionDefinitionStatus, - ) -> Result { + ) -> Result { /* * Either we get all the way through the negotiation, or we hit the * timeout and exit to retry. @@ -1327,8 +1312,9 @@ impl DownstairsClient { up_state, ClientNegotiationFailed::BadNegotiationOrder, ); - return Ok(false); + return Ok(NegotiationResult::NotDone); }; + let mut out = NegotiationResult::NotDone; let mode = *mode; // mode is immutable here match m { Message::YesItsMe { @@ -1341,7 +1327,7 @@ impl DownstairsClient { up_state, ClientNegotiationFailed::BadNegotiationOrder, ); - return Ok(false); + return Ok(NegotiationResult::NotDone); }; if version != CRUCIBLE_MESSAGE_VERSION { error!( @@ -1354,7 +1340,7 @@ impl DownstairsClient { up_state, ClientNegotiationFailed::Incompatible, ); - return Ok(false); + return Ok(NegotiationResult::NotDone); } self.repair_addr = Some(repair_addr); if auto_promote { @@ -1420,7 +1406,8 @@ impl DownstairsClient { up_state, ClientNegotiationFailed::BadNegotiationOrder, ); - return Ok(false); + // XXX why isn't this an error? + return Ok(NegotiationResult::NotDone); } let match_uuid = self.cfg.upstairs_id == upstairs_id; @@ -1486,7 +1473,7 @@ impl DownstairsClient { up_state, ClientNegotiationFailed::BadNegotiationOrder, ); - return Ok(false); + return Ok(NegotiationResult::NotDone); } info!( self.log, @@ -1503,7 +1490,7 @@ impl DownstairsClient { up_state, ClientNegotiationFailed::Incompatible, ); - return Ok(false); + return Ok(NegotiationResult::NotDone); } /* @@ -1648,7 +1635,8 @@ impl DownstairsClient { up_state, ClientNegotiationFailed::BadNegotiationOrder, ); - return Ok(false); // TODO should we trigger set_inactive? + // TODO should we trigger set_inactive? + return Ok(NegotiationResult::NotDone); } assert_eq!( mode, @@ -1662,9 +1650,10 @@ impl DownstairsClient { ); assert_eq!(self.last_flush, last_flush_number); - // Immediately set the state to Active, since we've already - // copied over the jobs. + // Immediately set the state to Active, and return a flag + // indicating that jobs should be replayed. self.checked_state_transition(up_state, DsState::Active); + out = NegotiationResult::Replay; } Message::ExtentVersions { gen_numbers, @@ -1677,14 +1666,17 @@ impl DownstairsClient { up_state, ClientNegotiationFailed::BadNegotiationOrder, ); - return Ok(false); // TODO should we trigger set_inactive? + // TODO should we trigger set_inactive? + return Ok(NegotiationResult::NotDone); } match mode { ConnectionMode::New => { *state = NegotiationState::WaitQuorum; + out = NegotiationResult::WaitQuorum; } ConnectionMode::Faulted | ConnectionMode::Replaced => { *state = NegotiationState::LiveRepairReady; + out = NegotiationResult::LiveRepair; } ConnectionMode::Offline => { panic!( @@ -1711,15 +1703,7 @@ impl DownstairsClient { } m => panic!("invalid message in continue_negotiation: {m:?}"), } - // TODO: reconsider this part? - Ok(matches!( - self.state, - DsState::Connecting { - state: NegotiationState::WaitQuorum // new - | NegotiationState::LiveRepairReady, // live-repair - .. - } | DsState::Active // replay - )) + Ok(out) } /// Sends the next reconciliation job to all clients @@ -1880,6 +1864,14 @@ pub enum NegotiationState { LiveRepairReady, } +/// Result value returned when negotiation is complete +pub(crate) enum NegotiationResult { + NotDone, + WaitQuorum, + Replay, + LiveRepair, +} + /// Result value from [`DownstairsClient::enqueue`] pub(crate) enum EnqueueResult { /// The given job should be marked as in progress and sent diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index cbcda81ef..d9e3f63b7 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -686,18 +686,6 @@ impl Downstairs { }) { self.abort_reconciliation(up_state); } - - // If this client is coming back from being offline, then mark that its - // jobs must be replayed when it completes negotiation. - if matches!( - self.clients[client_id].state(), - DsState::Connecting { - mode: ConnectionMode::Offline, - .. - } - ) { - self.clients[client_id].needs_replay(); - } } /// Returns true if we can deactivate immediately @@ -808,17 +796,10 @@ impl Downstairs { self.next_id } - /// Sends replay jobs to the given client if `needs_replay` is set - pub(crate) fn check_replay(&mut self, client_id: ClientId) { - if self.clients[client_id].check_replay() { - self.replay_jobs(client_id); - } - } - /// Sends all pending jobs for the given client /// /// Jobs are pending if they have not yet been flushed by this client. - fn replay_jobs(&mut self, client_id: ClientId) { + pub(crate) fn replay_jobs(&mut self, client_id: ClientId) { let lf = self.clients[client_id].last_flush(); info!( @@ -4436,7 +4417,7 @@ pub(crate) mod test { } /// Helper function to legally move the given client to live-repair - fn move_to_live_repair(ds: &mut Downstairs, to_repair: ClientId) { + fn to_live_repair_ready(ds: &mut Downstairs, to_repair: ClientId) { ds.fault_client( to_repair, &UpstairsState::Active, @@ -4455,6 +4436,11 @@ pub(crate) mod test { DsState::Connecting { state, mode }, ); } + } + + /// Helper function to legally move the given client to live-repair + fn move_to_live_repair(ds: &mut Downstairs, to_repair: ClientId) { + to_live_repair_ready(ds, to_repair); ds.clients[to_repair].checked_state_transition( &UpstairsState::Active, DsState::LiveRepair, @@ -6169,6 +6155,7 @@ pub(crate) mod test { // in the FailedReconcile state. Verify that attempts to get new work // after a failed repair now return none. let mut ds = Downstairs::test_default(); + set_all_reconcile(&mut ds); let up_state = UpstairsState::Active; let rep_id = ReconciliationId(0); @@ -6228,6 +6215,7 @@ pub(crate) mod test { #[test] fn reconcile_repair_workflow_1() { let mut ds = Downstairs::test_default(); + set_all_reconcile(&mut ds); let up_state = UpstairsState::Active; let close_id = ReconciliationId(0); @@ -6275,6 +6263,7 @@ pub(crate) mod test { fn reconcile_repair_workflow_2() { // Verify Done or Skipped works when checking for a complete repair let mut ds = Downstairs::test_default(); + set_all_reconcile(&mut ds); let up_state = UpstairsState::Active; let rep_id = ReconciliationId(1); @@ -10019,11 +10008,7 @@ pub(crate) mod test { // Fault the downstairs let to_repair = ClientId::new(1); - ds.fault_client( - to_repair, - &UpstairsState::Active, - ClientFaultReason::RequestedFault, - ); + to_live_repair_ready(&mut ds, to_repair); // Start the repair normally. This enqueues the close & reopen jobs, and // reserves Job IDs for the repair/noop diff --git a/upstairs/src/upstairs.rs b/upstairs/src/upstairs.rs index 7366ad297..c8893b1d8 100644 --- a/upstairs/src/upstairs.rs +++ b/upstairs/src/upstairs.rs @@ -3,7 +3,8 @@ use crate::{ cdt, client::{ - ClientAction, ClientRunResult, ClientStopReason, NegotiationState, + ClientAction, ClientRunResult, ClientStopReason, NegotiationResult, + NegotiationState, }, control::ControlRequest, deferred::{ @@ -1706,45 +1707,24 @@ impl Upstairs { // continue_negotiation returns an error if the upstairs // should go inactive! Err(e) => self.set_inactive(e), - Ok(false) => (), - Ok(true) => { + Ok(NegotiationResult::NotDone) => (), + Ok(NegotiationResult::WaitQuorum) => { // Copy the region definition into the Downstairs self.downstairs.set_ddef(self.ddef.get_def().unwrap()); - - // Check to see whether we want to replay jobs (if the - // Downstairs is coming back from being Offline) - // TODO should we only do this in certain new states? - self.downstairs.check_replay(client_id); - - // Negotiation succeeded for this Downstairs, let's see - // what we can do from here - match self.downstairs.clients[client_id].state() { - DsState::Active => (), - - DsState::Connecting { - state: NegotiationState::WaitQuorum, - .. - } => { - // See if we have a quorum - if self.connect_region_set() { - // We connected normally, so there's no need - // to check for live-repair. - self.repair_check_deadline = None; - } - } - - DsState::Connecting { - state: NegotiationState::LiveRepairReady, - .. - } => { - // Immediately check for live-repair - self.repair_check_deadline = - Some(Instant::now()); - } - - s => panic!("bad state after negotiation: {s:?}"), + // See if we have a quorum + if self.connect_region_set() { + // We connected normally, so there's no need + // to check for live-repair. + self.repair_check_deadline = None; } } + Ok(NegotiationResult::Replay) => { + self.downstairs.replay_jobs(client_id); + } + Ok(NegotiationResult::LiveRepair) => { + // Immediately check for live-repair + self.repair_check_deadline = Some(Instant::now()); + } } } @@ -2189,6 +2169,13 @@ pub(crate) mod test { &UpstairsState::Active, ClientFaultReason::RequestedFault, ); + // Restart the IO task (because we'll be faking messages from it) + up.apply(UpstairsAction::Downstairs(DownstairsAction::Client { + client_id: to_repair, + action: ClientAction::TaskStopped(ClientRunResult::RequestedStop( + ClientStopReason::Fault(ClientFaultReason::RequestedFault), + )), + })); let mode = ConnectionMode::Faulted; for state in [ NegotiationState::Start { auto_promote: true }, @@ -2198,7 +2185,7 @@ pub(crate) mod test { NegotiationState::LiveRepairReady, ] { up.downstairs.clients[to_repair].checked_state_transition( - &UpstairsState::Active, + &up.state, DsState::Connecting { state, mode }, ); } From 28df82064ac8656bce6822f151ad89724a18d6a9 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 25 Nov 2024 13:35:05 -0500 Subject: [PATCH 03/21] All tests passing --- upstairs/src/client.rs | 75 ------------------------------------------ 1 file changed, 75 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index 63e0b1feb..adaa98963 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -231,51 +231,6 @@ impl DownstairsClient { } } - /// Builds a minimal `DownstairsClient` for testing - /// - /// The resulting client has no target address; any packets sent by the - /// client will disappear into the void. - #[cfg(test)] - fn test_default() -> Self { - let client_delay_us = Arc::new(AtomicU64::new(0)); - let cfg = Arc::new(UpstairsConfig { - encryption_context: None, - upstairs_id: Uuid::new_v4(), - session_id: Uuid::new_v4(), - generation: std::sync::atomic::AtomicU64::new(1), - read_only: false, - }); - Self { - cfg, - client_task: Self::new_dummy_task(false), - client_id: ClientId::new(0), - io_limits: ClientIOLimits::new( - crate::IO_OUTSTANDING_MAX_JOBS * 3 / 2, - crate::IO_OUTSTANDING_MAX_BYTES as usize * 3 / 2, - ), - region_uuid: None, - tls_context: None, - log: crucible_common::build_logger(), - target_addr: None, - repair_addr: None, - state: DsState::Connecting { - mode: ConnectionMode::New, - state: NegotiationState::Start { - auto_promote: false, - }, - }, - last_flush: JobId(0), - stats: DownstairsStats::default(), - skipped_jobs: BTreeSet::new(), - region_metadata: None, - repair_info: None, - io_state_job_count: ClientIOStateCount::default(), - io_state_byte_count: ClientIOStateCount::default(), - connection_id: ConnectionId(0), - client_delay_us, - } - } - /// Choose which `ClientAction` to apply /// /// This function is called from within a top-level `select!`, so not only @@ -2674,33 +2629,3 @@ pub(crate) fn validate_unencrypted_read_response( } } } - -#[cfg(test)] -mod test { - use super::*; - - #[test] - #[should_panic] - fn downstairs_transition_deactivate_not_new() { - // Verify deactivate goes to new - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Stopping(ClientStopReason::Deactivated), - ); - } - - #[test] - #[should_panic] - fn downstairs_transition_same_active() { - let mut client = DownstairsClient::test_default(); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - client.checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); - } -} From 7a7a60e818a548e40e992666168860d360c827ed Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 25 Nov 2024 13:44:18 -0500 Subject: [PATCH 04/21] Update OpenAPI stuff --- openapi/crucible-control.json | 152 +++++++++++++++++++++++++--------- 1 file changed, 115 insertions(+), 37 deletions(-) diff --git a/openapi/crucible-control.json b/openapi/crucible-control.json index 4ff8f0e10..0854304bb 100644 --- a/openapi/crucible-control.json +++ b/openapi/crucible-control.json @@ -232,6 +232,15 @@ } ] }, + "ConnectionMode": { + "type": "string", + "enum": [ + "offline", + "new", + "faulted", + "replaced" + ] + }, "DownstairsWork": { "description": "`DownstairsWork` holds the information gathered from the downstairs", "type": "object", @@ -257,26 +266,44 @@ "DsState": { "oneOf": [ { + "description": "New connection", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "new" + "connecting" + ] + }, + "value": { + "type": "object", + "properties": { + "mode": { + "$ref": "#/components/schemas/ConnectionMode" + }, + "state": { + "$ref": "#/components/schemas/NegotiationState" + } + }, + "required": [ + "mode", + "state" ] } }, "required": [ - "type" + "type", + "value" ] }, { + "description": "Ready for and/or currently receiving IO", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "wait_active" + "active" ] } }, @@ -285,12 +312,13 @@ ] }, { + "description": "This downstairs is undergoing LiveRepair", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "wait_quorum" + "live_repair" ] } }, @@ -299,40 +327,83 @@ ] }, { + "description": "The IO task for the client is being stopped", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "reconcile" + "stopping" ] + }, + "value": { + "$ref": "#/components/schemas/ClientStopReason" } }, "required": [ - "type" + "type", + "value" ] + } + ] + }, + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + }, + "NegotiationState": { + "description": "Tracks client negotiation progress", + "oneOf": [ { + "description": "Initial state", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "active" + "start" + ] + }, + "value": { + "type": "object", + "properties": { + "auto_promote": { + "type": "boolean" + } + }, + "required": [ + "auto_promote" ] } }, "required": [ - "type" + "type", + "value" ] }, { + "description": "Waiting for activation by the guest", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "faulted" + "wait_active" ] } }, @@ -341,12 +412,13 @@ ] }, { + "description": "Waiting for the minimum number of downstairs to be present.", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "live_repair_ready" + "wait_quorum" ] } }, @@ -360,7 +432,7 @@ "type": { "type": "string", "enum": [ - "live_repair" + "wait_for_promote" ] } }, @@ -374,7 +446,7 @@ "type": { "type": "string", "enum": [ - "offline" + "wait_for_region_info" ] } }, @@ -388,7 +460,7 @@ "type": { "type": "string", "enum": [ - "replaced" + "get_last_flush" ] } }, @@ -397,43 +469,49 @@ ] }, { - "description": "The IO task for the client is being stopped", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "stopping" + "get_extent_versions" ] - }, - "value": { - "$ref": "#/components/schemas/ClientStopReason" } }, "required": [ - "type", - "value" + "type" ] - } - ] - }, - "Error": { - "description": "Error information from a response.", - "type": "object", - "properties": { - "error_code": { - "type": "string" }, - "message": { - "type": "string" + { + "description": "Initial startup, downstairs are repairing from each other.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "reconcile" + ] + } + }, + "required": [ + "type" + ] }, - "request_id": { - "type": "string" + { + "description": "Waiting for live-repair to begin", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "live_repair_ready" + ] + } + }, + "required": [ + "type" + ] } - }, - "required": [ - "message", - "request_id" ] }, "UpState": { From 44ce475ed1ddbf43ae14809c821c608286b4bdf7 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 25 Nov 2024 14:27:18 -0500 Subject: [PATCH 05/21] Fix cmon --- cmon/src/main.rs | 52 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/cmon/src/main.rs b/cmon/src/main.rs index bc1e2371e..21627b2b5 100644 --- a/cmon/src/main.rs +++ b/cmon/src/main.rs @@ -8,7 +8,9 @@ use strum::IntoEnumIterator; use strum_macros::EnumIter; use tokio::time::{sleep, Duration}; -use crucible::{Arg, ClientStopReason, DsState}; +use crucible::{ + Arg, ClientStopReason, ConnectionMode, DsState, NegotiationState, +}; /// Connect to crucible control server #[derive(Parser, Debug)] @@ -87,24 +89,46 @@ enum Action { // Translate a DsState into a three letter string for printing. fn short_state(dss: DsState) -> String { match dss { - DsState::New - | DsState::Stopping(ClientStopReason::NegotiationFailed(..)) => { - "NEW".to_string() - } - DsState::WaitActive => "WAC".to_string(), - DsState::WaitQuorum => "WAQ".to_string(), - DsState::Reconcile => "REC".to_string(), + DsState::Connecting { + state: NegotiationState::WaitActive, + .. + } => "WAC".to_string(), + + DsState::Connecting { + state: NegotiationState::WaitQuorum, + .. + } => "WAQ".to_string(), + DsState::Connecting { + state: NegotiationState::Reconcile, + .. + } => "REC".to_string(), DsState::Active => "ACT".to_string(), - DsState::Faulted | DsState::Stopping(ClientStopReason::Fault(..)) => { - "FLT".to_string() + DsState::Connecting { + state: NegotiationState::LiveRepairReady, + .. + } => "LRR".to_string(), + DsState::Stopping(ClientStopReason::NegotiationFailed(..)) + | DsState::Connecting { + mode: ConnectionMode::New, + .. + } => "NEW".to_string(), + DsState::Connecting { + mode: ConnectionMode::Faulted, + .. } - DsState::LiveRepairReady => "LRR".to_string(), + | DsState::Stopping(ClientStopReason::Fault(..)) => "FLT".to_string(), DsState::LiveRepair => "LR".to_string(), - DsState::Offline => "OFF".to_string(), + DsState::Connecting { + mode: ConnectionMode::Offline, + .. + } => "OFF".to_string(), DsState::Stopping(ClientStopReason::Deactivated) => "DAV".to_string(), DsState::Stopping(ClientStopReason::Disabled) => "DIS".to_string(), - DsState::Stopping(ClientStopReason::Replacing) => "RPC".to_string(), - DsState::Replaced => "RPD".to_string(), + DsState::Stopping(ClientStopReason::Replacing) + | DsState::Connecting { + mode: ConnectionMode::Replaced, + .. + } => "RPD".to_string(), } } From 280c357087929ff11b2e09e3561e588827a5a726 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 25 Nov 2024 15:18:14 -0500 Subject: [PATCH 06/21] All tests passing! --- openapi/crucible-control.json | 2 +- upstairs/src/client.rs | 20 ++++++++++++++++++-- upstairs/src/downstairs.rs | 10 +++++++++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/openapi/crucible-control.json b/openapi/crucible-control.json index 0854304bb..8fc81b2a3 100644 --- a/openapi/crucible-control.json +++ b/openapi/crucible-control.json @@ -370,7 +370,7 @@ "description": "Tracks client negotiation progress", "oneOf": [ { - "description": "Initial state", + "description": "Initial state, waiting to hear `YesItsMe` from the client\n\nOnce this message is heard, transitions to either `WaitActive` (if `auto_promote` is `false`) or `WaitQuorum` (if `auto_promote` is `true`)", "type": "object", "properties": { "type": { diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index adaa98963..7c3b2ba62 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -1160,7 +1160,6 @@ impl DownstairsClient { /// error is at or after `Message::YouAreNowActive`. /// /// Returns a flag indicating how to proceed - #[must_use] pub(crate) fn continue_negotiation( &mut self, m: Message, @@ -1629,6 +1628,20 @@ impl DownstairsClient { *state = NegotiationState::WaitQuorum; out = NegotiationResult::WaitQuorum; } + // Special case: if a downstairs is replaced while we're + // still trying to go active, then we use the WaitQuorum + // path instead of LiveRepair. + ConnectionMode::Replaced + if matches!( + up_state, + UpstairsState::Initializing + | UpstairsState::GoActive(..) + ) => + { + *state = NegotiationState::WaitQuorum; + out = NegotiationResult::WaitQuorum; + } + ConnectionMode::Faulted | ConnectionMode::Replaced => { *state = NegotiationState::LiveRepairReady; out = NegotiationResult::LiveRepair; @@ -1796,7 +1809,10 @@ impl DownstairsClient { #[serde(rename_all = "snake_case")] #[serde(tag = "type", content = "value")] pub enum NegotiationState { - /// Initial state + /// Initial state, waiting to hear `YesItsMe` from the client + /// + /// Once this message is heard, transitions to either `WaitActive` (if + /// `auto_promote` is `false`) or `WaitQuorum` (if `auto_promote` is `true`) Start { auto_promote: bool, }, diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index d9e3f63b7..2cb45892a 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -2602,6 +2602,14 @@ impl Downstairs { continue; } match self.clients[client_id].state() { + // Replacement is allowed before starting, but not between + // activation and coming online + DsState::Connecting { + state: + NegotiationState::Start { .. } + | NegotiationState::WaitActive, + mode: ConnectionMode::New, + } => {} DsState::Stopping(..) | DsState::Connecting { .. } | DsState::LiveRepair => { @@ -2610,7 +2618,7 @@ impl Downstairs { self.clients[client_id].state(), ))); } - _ => {} + DsState::Active => {} } } From 2390a68d71e2673441e8a02c73938f5a9ab10cb0 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 2 Dec 2024 10:09:11 -0500 Subject: [PATCH 07/21] Allow reconciliation after replacement --- upstairs/src/client.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index 7c3b2ba62..9f6a9d806 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -438,7 +438,10 @@ impl DownstairsClient { panic!("invalid state {:?}", self.state); }; assert_eq!(*state, NegotiationState::WaitQuorum); - assert_eq!(*mode, ConnectionMode::New); // XXX Replaced? + assert!(matches!( + mode, + ConnectionMode::New | ConnectionMode::Replaced + )); *state = NegotiationState::Reconcile; } @@ -1687,7 +1690,7 @@ impl DownstairsClient { self.state, DsState::Connecting { state: NegotiationState::Reconcile, - mode: ConnectionMode::New + mode: ConnectionMode::New | ConnectionMode::Replaced } ) { panic!( From 75d5c157b4a80bf7bf9e6b20a5252c6617e77adb Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 2 Dec 2024 10:33:19 -0500 Subject: [PATCH 08/21] Fix valid replacement states for other DS --- upstairs/src/downstairs.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index 2cb45892a..ab3a3aaf7 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -2602,23 +2602,25 @@ impl Downstairs { continue; } match self.clients[client_id].state() { - // Replacement is allowed before starting, but not between - // activation and coming online - DsState::Connecting { - state: - NegotiationState::Start { .. } - | NegotiationState::WaitActive, - mode: ConnectionMode::New, - } => {} DsState::Stopping(..) - | DsState::Connecting { .. } - | DsState::LiveRepair => { + | DsState::LiveRepair + | DsState::Connecting { + mode: + ConnectionMode::Replaced + | ConnectionMode::Offline + | ConnectionMode::Faulted, + .. + } => { return Err(CrucibleError::ReplaceRequestInvalid(format!( "Replace {old} failed, downstairs {client_id} is {:?}", self.clients[client_id].state(), ))); } - DsState::Active => {} + DsState::Active + | DsState::Connecting { + mode: ConnectionMode::New, + .. + } => {} } } From 26be47d61c81c77820b9ece0b6acc0d902964b6e Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 2 Dec 2024 10:51:02 -0500 Subject: [PATCH 09/21] Remove duplicate ClientStopReason in ClientRunResult --- upstairs/src/client.rs | 101 ++++++++++++++------------------------- upstairs/src/upstairs.rs | 12 ++--- 2 files changed, 39 insertions(+), 74 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index 9f6a9d806..6b2c465d7 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -26,7 +26,10 @@ use serde::{Deserialize, Serialize}; use slog::{debug, error, info, o, warn, Logger}; use tokio::{ net::{TcpSocket, TcpStream}, - sync::{mpsc, oneshot}, + sync::{ + mpsc, + oneshot::{self, error::RecvError}, + }, time::{sleep, sleep_until, Duration, Instant}, }; use tokio_util::codec::FramedRead; @@ -2048,8 +2051,9 @@ pub(crate) enum ClientRunResult { #[allow(dead_code)] ReadFailed(anyhow::Error), /// The `DownstairsClient` requested that the task stop, so it did - #[allow(dead_code)] - RequestedStop(ClientStopReason), + /// + /// The reason for the stop will be in the `DsState::Stopping(..)` member + RequestedStop, /// The socket closed cleanly and the task exited Finished, /// One of the queues used to communicate with the main task closed @@ -2065,6 +2069,16 @@ pub(crate) enum ClientRunResult { ReceiveTaskCancelled, } +/// Convert a oneshot result into a `ClientStopReason` +impl From> for ClientRunResult { + fn from(value: Result) -> Self { + match value { + Ok(..) => ClientRunResult::RequestedStop, + Err(..) => ClientRunResult::QueueClosed, + } + } +} + /// Data structure to hold context for the client IO task /// /// Client IO is managed by two tasks: @@ -2182,18 +2196,11 @@ impl ClientIoTask { if self.delay { tokio::select! { s = &mut self.stop => { - warn!(self.log, "client IO task stopped during sleep"); - return match s { - Ok(s) => - ClientRunResult::RequestedStop(s), - Err(e) => { - warn!( - self.log, - "client_stop_rx closed unexpectedly: {e:?}" - ); - ClientRunResult::QueueClosed - } - } + warn!( + self.log, + "client IO task stopped during sleep: {s:?}" + ); + return s.into(); } _ = tokio::time::sleep(CLIENT_RECONNECT_DELAY) => { // this is fine @@ -2214,18 +2221,11 @@ impl ClientIoTask { // Otherwise, continue as usual } s = &mut self.stop => { - warn!(self.log, "client IO task stopped before connecting"); - return match s { - Ok(s) => - ClientRunResult::RequestedStop(s), - Err(e) => { - warn!( - self.log, - "client_stop_rx closed unexpectedly: {e:?}" - ); - ClientRunResult::QueueClosed - } - } + warn!( + self.log, + "client IO task stopped before connecting: {s:?}" + ); + return s.into(); } } @@ -2264,18 +2264,11 @@ impl ClientIoTask { } } s = &mut self.stop => { - warn!(self.log, "client IO task stopped during connection"); - return match s { - Ok(s) => - ClientRunResult::RequestedStop(s), - Err(e) => { - warn!( - self.log, - "client_stop_rx closed unexpectedly: {e:?}" - ); - ClientRunResult::QueueClosed - } - } + warn!( + self.log, + "client IO task stopped during connection: {s:?}" + ); + return s.into(); } }; @@ -2370,19 +2363,8 @@ impl ClientIoTask { } s = &mut self.stop => { - match s { - Ok(s) => { - break ClientRunResult::RequestedStop(s); - } - - Err(e) => { - warn!( - self.log, - "client_stop_rx closed unexpectedly: {e:?}" - ); - break ClientRunResult::QueueClosed; - } - } + info!(self.log, "client stopping due to {s:?}"); + break s.into(); } } } @@ -2440,19 +2422,8 @@ impl ClientIoTask { } } s = &mut self.stop => { - match s { - Ok(s) => { - Err(ClientRunResult::RequestedStop(s)) - } - - Err(e) => { - warn!( - self.log, - "client_stop_rx closed unexpectedly: {e:?}" - ); - Err(ClientRunResult::QueueClosed) - } - } + info!(self.log, "client stopped in write due to {s:?}"); + Err(s.into()) } join_result = self.recv_task.join() => { Err(join_result) diff --git a/upstairs/src/upstairs.rs b/upstairs/src/upstairs.rs index c8893b1d8..de5cd9cff 100644 --- a/upstairs/src/upstairs.rs +++ b/upstairs/src/upstairs.rs @@ -2172,9 +2172,7 @@ pub(crate) mod test { // Restart the IO task (because we'll be faking messages from it) up.apply(UpstairsAction::Downstairs(DownstairsAction::Client { client_id: to_repair, - action: ClientAction::TaskStopped(ClientRunResult::RequestedStop( - ClientStopReason::Fault(ClientFaultReason::RequestedFault), - )), + action: ClientAction::TaskStopped(ClientRunResult::RequestedStop), })); let mode = ConnectionMode::Faulted; for state in [ @@ -2285,9 +2283,7 @@ pub(crate) mod test { up.apply(UpstairsAction::Downstairs(DownstairsAction::Client { client_id, action: ClientAction::TaskStopped( - ClientRunResult::RequestedStop( - ClientStopReason::Deactivated, - ), + ClientRunResult::RequestedStop, ), })); @@ -3684,9 +3680,7 @@ pub(crate) mod test { up.apply(UpstairsAction::Downstairs(DownstairsAction::Client { client_id, action: ClientAction::TaskStopped( - ClientRunResult::RequestedStop( - ClientStopReason::Deactivated, - ), + ClientRunResult::RequestedStop, ), })); } From ef7bb0cbdbea2dfa24f25f72282af8ecc72d2d17 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 2 Dec 2024 14:12:11 -0500 Subject: [PATCH 10/21] Correctly skip jobs on reinitialize --- upstairs/src/downstairs.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index ab3a3aaf7..3ad75b2c8 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -645,6 +645,16 @@ impl Downstairs { client_id: ClientId, up_state: &UpstairsState, ) { + // Check whether we asked the IO task to stop ourselves + let stopped_due_to_fault = matches!( + self.clients[client_id].state(), + DsState::Stopping(ClientStopReason::Fault(..)) + ); + + // Restart the IO task for that specific client, transitioning to a new + // state. + self.clients[client_id].reinitialize(up_state, self.can_replay); + // If the IO task stops on its own, then under certain circumstances, // we want to skip all of its jobs. (If we requested that the IO task // stop, then whoever made that request is responsible for skipping jobs @@ -653,21 +663,17 @@ impl Downstairs { // Specifically, we want to skip jobs if the only path back online for // that client goes through live-repair; if that client can come back // through replay, then the jobs must remain live. - let client_state = self.clients[client_id].state(); - if matches!( - client_state, - DsState::LiveRepair | DsState::LiveRepairReady - ) || matches!( - client_state, - DsState::Active | DsState::Offline if !self.can_replay - ) { + let now_faulted = matches!( + self.clients[client_id].state(), + DsState::Connecting { + mode: ConnectionMode::Faulted, + .. + } + ); + if now_faulted && !stopped_due_to_fault { self.skip_all_jobs(client_id); } - // Restart the IO task for that specific client, transitioning to a new - // state. - self.clients[client_id].reinitialize(up_state, self.can_replay); - for i in ClientId::iter() { // Clear per-client delay, because we're starting a new session self.clients[i].set_delay_us(0); From 092a86fa6e127826d302f1689ccacff6e72e07b4 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Tue, 3 Dec 2024 09:52:34 -0500 Subject: [PATCH 11/21] Cleaning up reinitialize --- upstairs/src/client.rs | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index 6b2c465d7..19bca9379 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -541,43 +541,50 @@ impl DownstairsClient { let current = &self.state; let new_mode = match current { + // If we can't replay jobs, then reconnection must happen through + // live-repair (rather than replay). DsState::Active | DsState::Connecting { mode: ConnectionMode::Offline, .. - } if !can_replay => Some(ConnectionMode::Faulted), + } if !can_replay => ConnectionMode::Faulted, + + // Other failures during connection preserve the previous mode + DsState::Connecting { mode, .. } => *mode, + + // Faults or failures during live-repair must go through live-repair DsState::LiveRepair | DsState::Stopping(ClientStopReason::Fault(..)) => { - Some(ConnectionMode::Faulted) + ConnectionMode::Faulted } - DsState::Active => Some(ConnectionMode::Offline), + // If the Downstairs has spontaneously stopped, we will attempt to + // replay jobs when reconnecting + DsState::Active => ConnectionMode::Offline, + // Failures during negotiation or deactivation have to start from + // the very beginning. DsState::Stopping(ClientStopReason::NegotiationFailed(..)) | DsState::Stopping(ClientStopReason::Disabled) | DsState::Stopping(ClientStopReason::Deactivated) => { - Some(ConnectionMode::New) + ConnectionMode::New } - // If we have replaced a downstairs, don't forget that. + // Deliberate Downstairs replacement connects using live-repair, + // with a flag that allows for the address to change DsState::Stopping(ClientStopReason::Replacing) => { - Some(ConnectionMode::Replaced) + ConnectionMode::Replaced } - - // We stay in these states through the task restart - DsState::Connecting { .. } => None, }; - let new_state = new_mode.map(|mode| DsState::Connecting { - mode, + let new_state = DsState::Connecting { + mode: new_mode, state: NegotiationState::Start { auto_promote }, - }); + }; - // Jobs are skipped and replayed in `Downstairs::reinitialize`, which is - // (probably) the caller of this function. - if let Some(new_state) = new_state { - self.checked_state_transition(up_state, new_state); - } + // Note that jobs are skipped / replayed in `Downstairs::reinitialize`, + // which is (probably) the caller of this function! + self.checked_state_transition(up_state, new_state); self.connection_id.update(); // Restart with a short delay, connecting if we're auto-promoting From fc334f31f525f9f82b23a87974bbe18c4e1e0743 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Tue, 3 Dec 2024 10:25:54 -0500 Subject: [PATCH 12/21] Update comments --- upstairs/src/client.rs | 58 ++++++++++++++++++++++--- upstairs/src/lib.rs | 97 +++++++++++------------------------------- 2 files changed, 76 insertions(+), 79 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index 19bca9379..c0c8faf50 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -1816,6 +1816,47 @@ impl DownstairsClient { } /// Tracks client negotiation progress +/// +/// The exact path through negotiation depends on the [`ConnectionMode`]. +/// +/// There are three main paths, shown below: +/// +/// ```text +/// ┌───────┐ +/// │ Start ├────────┐ +/// └───┬───┘ │ +/// │ │ +/// ┌─────▼──────┐ │ +/// │ WaitActive │ │ auto-promote +/// └─────┬──────┘ │ +/// │ │ +/// ┌───────▼────────┐ │ +/// │ WaitForPromote ◄───┘ +/// └───────┬────────┘ +/// │ +/// ┌────────▼──────────┐ +/// │ WaitForRegionInfo │ +/// └──┬──────────────┬─┘ +/// Offline │ │ New / Faulted / Replaced +/// ┌──────▼─────┐ ┌────▼────────────┐ +/// │GetLastFlush│ │GetExtentVersions│ +/// └──────┬─────┘ └─┬─────────────┬─┘ +/// │ │ New │ Faulted / Replaced +/// │ ┌──────▼───┐ ┌────▼──────────┐ +/// │ │WaitQuorum│ │LiveRepairReady│ +/// │ └────┬─────┘ └────┬──────────┘ +/// │ │ │ +/// │ ┌────▼────┐ │ +/// │ │Reconcile│ │ +/// │ └────┬────┘ │ +/// │ │ │ +/// │ ┌───▼──┐ │ +/// └─────► Done ◄────────────┘ +/// └──────┘ +/// ``` +/// +/// `Done` isn't actually present in the state machine; it's indicated by +/// returning a [`NegotiationResult`] other than [`NegotiationResult::NotDone`]. #[derive( Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize, JsonSchema, )] @@ -1826,21 +1867,26 @@ pub enum NegotiationState { /// /// Once this message is heard, transitions to either `WaitActive` (if /// `auto_promote` is `false`) or `WaitQuorum` (if `auto_promote` is `true`) - Start { - auto_promote: bool, - }, + Start { auto_promote: bool }, /// Waiting for activation by the guest WaitActive, - /// Waiting for the minimum number of downstairs to be present. - WaitQuorum, - + /// Waiting to hear `YouAreNowActive` from the client WaitForPromote, + + /// Waiting to hear `RegionInfo` from the client WaitForRegionInfo, + + /// Waiting to hear `LastFlushAck` from the client GetLastFlush, + + /// Waiting to hear `ExtentVersions` from the client GetExtentVersions, + /// Waiting for the minimum number of downstairs to be present. + WaitQuorum, + /// Initial startup, downstairs are repairing from each other. Reconcile, diff --git a/upstairs/src/lib.rs b/upstairs/src/lib.rs index b409cc140..dc215e886 100644 --- a/upstairs/src/lib.rs +++ b/upstairs/src/lib.rs @@ -714,78 +714,24 @@ pub(crate) struct RawReadResponse { pub data: bytes::BytesMut, } -/* - * States of a downstairs - * - * This shows the different states a downstairs can be in from the point of - * view of the upstairs. - * - * Double line paths can only be taken if an upstairs is active and goes to - * deactivated. - * - * │ - * ┌──┐ ▼ - * bad│ │ │ - * version│ ┌▼───┴──────┐ - * └─┤ ╞═════◄══════════════════╗ - * ┌─────────────► New ╞═════◄════════════════╗ ║ - * │ ┌─────► ├─────◄──────┐ ║ ║ - * │ │ └────┬───┬──┘ │ ║ ║ - * │ │ ▼ └───►───┐ other │ ║ ║ - * │ bad│ ┌────┴──────┐ │ failures ║ ║ - * │ region│ │ Wait │ │ ▲ ║ ║ - * │ │ │ Active ├─►┐ │ │ ║ ║ - * │ │ └────┬──────┘ │ │ │ ║ ║ - * │ │ ┌────┴──────┐ │ └───────┤ ║ ║ - * │ │ │ Wait │ └─────────┤ ║ ║ - * │ └─────┤ Quorum ├──►─────────┤ ║ ║ - * │ └────┬──────┘ │ ║ ║ - * │ ........▼.......... │ ║ ║ - * │failed : ┌────┴──────┐ : │ ║ ║ - * │reconcile : │ Reconcile │ : │ ╔═╝ ║ - * └─────────────┤ ├──►─────────┘ ║ ║ - * : └────┬──────┘ : ║ ║ - * Not Active : │ : ▲ ▲ Not Active - * .............. . . . │. . . . ...................║...║............ - * Active ▼ ║ ║ Active - * ┌────┴──────┐ ┌──────────╨┐ ║ - * ┌─►─┤ Active ├─────►───┤Deactivated│ ║ - * │ │ │ ┌──────┤ ├─◄──────┐ - * │ └─┬───┬───┬─┘ │ └───────────┘ ║ │ - * │ ▼ ▼ ▲ ▲ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ ▼ ▲ ▲ ║ │ - * │ │ │ │ │ ▲ │ - * │ │ ┌─┴───┴────┴┐ ┌────────────╨──┐ │ - * │ │ │ Offline │ │ Faulted │ │ - * │ │ │ ├─────►─┤ │ │ - * │ │ └───────────┘ └─┬─┬───────┬─┬─┘ │ - * │ │ ▲ ▲ ▼ ▲ ▲ - * │ └───────────►───────────┘ │ │ │ │ - * │ │ │ │ │ - * │ ┌────────┴─┐ ┌─┴─┴────┴─┐ - * └──────────────────────┤ Live ├─◄─┤ Live │ - * │ Repair │ │ Repair │ - * │ │ │ Ready │ - * └──────────┘ └──────────┘ - * - * - * The downstairs state can go to Disabled from any other state, as that - * transition happens when a message is received from the actual - * downstairs on the other side of the connection.. - * The only path back at that point is for the Upstairs (who will self - * deactivate when it detects this) is to go back to New and through - * the reconcile process. - * ┌───────────┐ - * │ Disabled │ - * └───────────┘ - */ +/// High-level states for a Downstairs +/// +/// The state machine for a Downstairs is relatively simple: +/// +/// ```text +/// ┌────────────┐ +/// ┌────► LiveRepair ├─────┐ +/// ┌─────────┴┐ └─────┬──────┘ ┌─▼──────┐ +/// │Connecting│ │ │Stopping│ +/// └─▲───────┬┘ ┌─────▼──────┐ └─▲────┬─┘ +/// │ └────► Active ├─────┘ │ +/// │ └─────┬──────┘ │ +/// │ │ │ +/// └─────────────────◄┴─────────────────┘ +/// ``` +/// +/// Complexity is hidden in the `Connecting` state, which wraps a +/// [`NegotiationState`] implementing the negotiation state machine. #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] @@ -806,6 +752,7 @@ pub enum DsState { /// The IO task for the client is being stopped Stopping(ClientStopReason), } + impl std::fmt::Display for DsState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -873,9 +820,13 @@ impl std::fmt::Display for DsState { #[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum ConnectionMode { - Offline, + /// Connect through reconciliation once a quorum has come online New, + /// Replay cached jobs when reconnecting + Offline, + /// Reconnect through live-repair Faulted, + /// Reconnect through live-repair; the address is allowed to change Replaced, } From 1b633d689ecbf56211c6f4bbebb37e4768ee775e Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Tue, 3 Dec 2024 11:05:26 -0500 Subject: [PATCH 13/21] Cleaner ConnectionMode handling during replacement --- upstairs/src/client.rs | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index c0c8faf50..c57e8753e 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -441,10 +441,7 @@ impl DownstairsClient { panic!("invalid state {:?}", self.state); }; assert_eq!(*state, NegotiationState::WaitQuorum); - assert!(matches!( - mode, - ConnectionMode::New | ConnectionMode::Replaced - )); + assert!(matches!(mode, ConnectionMode::New)); *state = NegotiationState::Reconcile; } @@ -570,11 +567,17 @@ impl DownstairsClient { ConnectionMode::New } - // Deliberate Downstairs replacement connects using live-repair, - // with a flag that allows for the address to change - DsState::Stopping(ClientStopReason::Replacing) => { - ConnectionMode::Replaced - } + DsState::Stopping(ClientStopReason::Replacing) => match up_state { + // If we haven't activated yet, then start from New + UpstairsState::GoActive(..) | UpstairsState::Initializing => { + ConnectionMode::New + } + // Otherwise, use live-repair; `ConnectionMode::Replaced` + // indicates that the address is allowed to change. + UpstairsState::Active | UpstairsState::Deactivating { .. } => { + ConnectionMode::Replaced + } + }, }; let new_state = DsState::Connecting { mode: new_mode, @@ -755,7 +758,7 @@ impl DownstairsClient { match &mut self.state { DsState::Connecting { state: NegotiationState::Start { auto_promote }, - mode: ConnectionMode::New | ConnectionMode::Replaced, + mode: ConnectionMode::New, } => { if *auto_promote { panic!("called set_active_request while already waiting") @@ -769,7 +772,7 @@ impl DownstairsClient { } DsState::Connecting { state: NegotiationState::WaitActive, - mode: ConnectionMode::New | ConnectionMode::Replaced, + mode: ConnectionMode::New, } => { info!( self.log, From 9a1ff37d9cfb93708444d12d7c87eaa822aa005b Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Tue, 3 Dec 2024 11:47:38 -0500 Subject: [PATCH 14/21] Update OpenAPI docs --- openapi/crucible-control.json | 54 ++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/openapi/crucible-control.json b/openapi/crucible-control.json index 8fc81b2a3..c0ea3593d 100644 --- a/openapi/crucible-control.json +++ b/openapi/crucible-control.json @@ -233,12 +233,35 @@ ] }, "ConnectionMode": { - "type": "string", - "enum": [ - "offline", - "new", - "faulted", - "replaced" + "oneOf": [ + { + "description": "Connect through reconciliation once a quorum has come online", + "type": "string", + "enum": [ + "new" + ] + }, + { + "description": "Replay cached jobs when reconnecting", + "type": "string", + "enum": [ + "offline" + ] + }, + { + "description": "Reconnect through live-repair", + "type": "string", + "enum": [ + "faulted" + ] + }, + { + "description": "Reconnect through live-repair; the address is allowed to change", + "type": "string", + "enum": [ + "replaced" + ] + } ] }, "DownstairsWork": { @@ -264,6 +287,7 @@ ] }, "DsState": { + "description": "High-level states for a Downstairs\n\nThe state machine for a Downstairs is relatively simple:\n\n```text ┌────────────┐ ┌────► LiveRepair ├─────┐ ┌─────────┴┐ └─────┬──────┘ ┌─▼──────┐ │Connecting│ │ │Stopping│ └─▲───────┬┘ ┌─────▼──────┐ └─▲────┬─┘ │ └────► Active ├─────┘ │ │ └─────┬──────┘ │ │ │ │ └─────────────────◄┴─────────────────┘ ```\n\nComplexity is hidden in the `Connecting` state, which wraps a [`NegotiationState`] implementing the negotiation state machine.", "oneOf": [ { "description": "New connection", @@ -367,7 +391,7 @@ ] }, "NegotiationState": { - "description": "Tracks client negotiation progress", + "description": "Tracks client negotiation progress\n\nThe exact path through negotiation depends on the [`ConnectionMode`].\n\nThere are three main paths, shown below:\n\n```text ┌───────┐ │ Start ├────────┐ └───┬───┘ │ │ │ ┌─────▼──────┐ │ │ WaitActive │ │ auto-promote └─────┬──────┘ │ │ │ ┌───────▼────────┐ │ │ WaitForPromote ◄───┘ └───────┬────────┘ │ ┌────────▼──────────┐ │ WaitForRegionInfo │ └──┬──────────────┬─┘ Offline │ │ New / Faulted / Replaced ┌──────▼─────┐ ┌────▼────────────┐ │GetLastFlush│ │GetExtentVersions│ └──────┬─────┘ └─┬─────────────┬─┘ │ │ New │ Faulted / Replaced │ ┌──────▼───┐ ┌────▼──────────┐ │ │WaitQuorum│ │LiveRepairReady│ │ └────┬─────┘ └────┬──────────┘ │ │ │ │ ┌────▼────┐ │ │ │Reconcile│ │ │ └────┬────┘ │ │ │ │ │ ┌───▼──┐ │ └─────► Done ◄────────────┘ └──────┘ ```\n\n`Done` isn't actually present in the state machine; it's indicated by returning a [`NegotiationResult`] other than [`NegotiationResult::NotDone`].", "oneOf": [ { "description": "Initial state, waiting to hear `YesItsMe` from the client\n\nOnce this message is heard, transitions to either `WaitActive` (if `auto_promote` is `false`) or `WaitQuorum` (if `auto_promote` is `true`)", @@ -412,13 +436,13 @@ ] }, { - "description": "Waiting for the minimum number of downstairs to be present.", + "description": "Waiting to hear `YouAreNowActive` from the client", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "wait_quorum" + "wait_for_promote" ] } }, @@ -427,12 +451,13 @@ ] }, { + "description": "Waiting to hear `RegionInfo` from the client", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "wait_for_promote" + "wait_for_region_info" ] } }, @@ -441,12 +466,13 @@ ] }, { + "description": "Waiting to hear `LastFlushAck` from the client", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "wait_for_region_info" + "get_last_flush" ] } }, @@ -455,12 +481,13 @@ ] }, { + "description": "Waiting to hear `ExtentVersions` from the client", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "get_last_flush" + "get_extent_versions" ] } }, @@ -469,12 +496,13 @@ ] }, { + "description": "Waiting for the minimum number of downstairs to be present.", "type": "object", "properties": { "type": { "type": "string", "enum": [ - "get_extent_versions" + "wait_quorum" ] } }, From b27943b7a21652206bbc96dd9e747199b93abe26 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Tue, 3 Dec 2024 11:55:28 -0500 Subject: [PATCH 15/21] Fix notify-nexus code --- upstairs/src/downstairs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index 3ad75b2c8..08be2f01b 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -4256,7 +4256,7 @@ impl Downstairs { ClientRunResult::ReadFailed(_) => { DownstairsClientStoppedReason::ReadFailed } - ClientRunResult::RequestedStop(_) => { + ClientRunResult::RequestedStop => { // skip this notification, it fires for *every* Upstairs // deactivation //DownstairsClientStoppedReason::RequestedStop From 3ad1361078402708f8ec83a15e4d4e613bc2baff Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Wed, 11 Dec 2024 10:22:15 -0500 Subject: [PATCH 16/21] Don't autopromote when a client is disabled --- upstairs/src/client.rs | 5 ++++- upstairs/src/downstairs.rs | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index c57e8753e..b17134436 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -531,7 +531,10 @@ impl DownstairsClient { // downstairs should automatically call PromoteToActive when it reaches // the relevant state. let auto_promote = match up_state { - UpstairsState::Active | UpstairsState::GoActive(..) => true, + UpstairsState::Active | UpstairsState::GoActive(..) => !matches!( + self.state, + DsState::Stopping(ClientStopReason::Disabled) + ), UpstairsState::Initializing | UpstairsState::Deactivating { .. } => false, }; diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index 08be2f01b..d81ddab52 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -3394,6 +3394,7 @@ impl Downstairs { "Saw CrucibleError::UpstairsInactive on client {}!", client_id ); + // XXX should we also change the upstairs state here? self.clients[client_id].disable(up_state); } Some(CrucibleError::DecryptionError) => { From 958627ddb85f440f159d21a71576db21206105cd Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Wed, 11 Dec 2024 10:22:15 -0500 Subject: [PATCH 17/21] Concerns! --- upstairs/src/client.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index b17134436..deed9ca27 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -567,6 +567,8 @@ impl DownstairsClient { DsState::Stopping(ClientStopReason::NegotiationFailed(..)) | DsState::Stopping(ClientStopReason::Disabled) | DsState::Stopping(ClientStopReason::Deactivated) => { + // XXX NegotiationFailed could also be hit during reconnection, + // in which case we shouldn't use `ConnectionMode::New` (?) ConnectionMode::New } From a1b9eee047892c6535c871f73d4eddff2f6b43da Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Thu, 12 Dec 2024 12:13:28 -0500 Subject: [PATCH 18/21] Reimplement checked_state_transition --- upstairs/src/client.rs | 165 +++++++++++++++++++++++++++++++++++-- upstairs/src/downstairs.rs | 54 ++++++------ upstairs/src/upstairs.rs | 1 - 3 files changed, 187 insertions(+), 33 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index deed9ca27..781382b12 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -939,12 +939,6 @@ impl DownstairsClient { /// will panic if there is not a valid state transition edge between the /// current `self.state` and the requested `new_state`. /// - /// For example, transitioning to a `new_state` of [DsState::Replacing] is - /// *always* possible, so this will never panic for that state transition. - /// On the other hand, [DsState::Replaced] can *only* follow - /// [DsState::Replacing], so if the current state is *anything else*, that - /// indicates a logic error happened in some other part of the code. - /// /// If the state transition is valid, this function simply sets `self.state` /// to the newly requested state. There's no magic here beyond that; this /// function does not change anything about the state or any other internal @@ -954,13 +948,131 @@ impl DownstairsClient { /// If the transition is not valid pub(crate) fn checked_state_transition( &mut self, - _up_state: &UpstairsState, + up_state: &UpstairsState, new_state: DsState, ) { - // TODO reimplement all of the checks + if !Self::is_state_transition_valid(up_state, self.state, new_state) { + panic!( + "invalid state transition from {:?} -> {:?} \ + (with up_state: {:?}", + self.state, new_state, up_state + ); + } self.state = new_state; } + /// Check if a state transition is valid, returning `true` or `false` + fn is_state_transition_valid( + up_state: &UpstairsState, + prev_state: DsState, + next_state: DsState, + ) -> bool { + use ConnectionMode as C; + use DsState as D; + use NegotiationState as N; + use UpstairsState as U; + match (prev_state, next_state) { + ( + D::Connecting { + state: prev_state, + mode: prev_mode, + }, + D::Connecting { + state: next_state, + mode: next_mode, + }, + ) => { + if next_mode == C::New && matches!(up_state, U::Active) { + return false; + } + next_mode == prev_mode + && NegotiationState::is_transition_valid( + prev_mode, prev_state, next_state, + ) + } + (D::Connecting { state, mode }, D::Active) => { + // We can go to Active either through reconciliation or replay; + // in other cases, we must use live-repair + matches!( + (state, mode), + (N::GetLastFlush, C::Offline) + | (N::Reconcile, C::New) + | (N::LiveRepairReady, C::Faulted | C::Replaced) + ) + } + (D::Connecting { state, mode }, D::LiveRepair) => { + matches!( + (state, mode), + (N::LiveRepairReady, C::Faulted | C::Replaced) + ) + } + (D::LiveRepair, D::Active) => true, + // When can we stop the IO task ourselves? + ( + D::Connecting { .. }, + D::Stopping( + ClientStopReason::NegotiationFailed(..) + | ClientStopReason::Replacing + | ClientStopReason::Disabled + | ClientStopReason::Fault(..), + ), + ) => true, + ( + D::Active | D::LiveRepair, + D::Stopping( + ClientStopReason::Fault(..) + | ClientStopReason::Replacing + | ClientStopReason::Disabled, + ), + ) => true, + (_, D::Stopping(ClientStopReason::Deactivated)) => { + matches!(up_state, U::Deactivating(..)) + } + + (D::Stopping(r), D::Connecting { mode, state }) => { + use ClientStopReason as R; + matches!( + (r, mode, state), + (R::Fault(..), C::Faulted, N::Start { .. }) + | ( + R::Deactivated | R::Disabled, + C::New, + N::Start { + auto_promote: false + } + ) + | ( + R::Replacing, + C::Replaced, + N::Start { auto_promote: true } + ) + | ( + R::Replacing, + C::New, + N::Start { + auto_promote: false + } + ) + | (R::NegotiationFailed(..), C::New, N::Start { .. }) + ) + } + + // When the upstairs is active, we can always spontaneously + // disconnect, which brings us to either Offline or Faulted + // depending on whether replay is valid + ( + _, + D::Connecting { + mode: C::Offline | C::Faulted, + state: N::Start { auto_promote: true }, + }, + ) => matches!(up_state, U::Active), + + // Anything not allowed is prohibited + _ => false, + } + } + /// Sets `repair_info` to `None` and increments `live_repair_aborted` pub(crate) fn clear_repair_state(&mut self) { self.repair_info = None; @@ -1902,6 +2014,43 @@ pub enum NegotiationState { LiveRepairReady, } +impl NegotiationState { + fn is_transition_valid( + mode: ConnectionMode, + prev_state: Self, + next_state: Self, + ) -> bool { + use ConnectionMode as C; + use NegotiationState as N; + + matches!( + (prev_state, next_state, mode), + (N::Start { auto_promote: true }, N::WaitForPromote, _) + | ( + N::Start { + auto_promote: false, + }, + N::WaitActive, + _, + ) + | (N::WaitActive, N::WaitForPromote, _) + | (N::WaitForPromote, N::WaitForRegionInfo, _) + | (N::WaitForRegionInfo, N::GetLastFlush, C::Offline) + | ( + N::WaitForRegionInfo, + N::GetExtentVersions, + C::New | C::Faulted | C::Replaced, + ) + | (N::GetExtentVersions, N::WaitQuorum, C::New) + | (N::WaitQuorum, N::Reconcile, C::New) + | ( + N::GetExtentVersions, + N::LiveRepairReady, + C::Faulted | C::Replaced, + ) + ) + } +} /// Result value returned when negotiation is complete pub(crate) enum NegotiationResult { NotDone, diff --git a/upstairs/src/downstairs.rs b/upstairs/src/downstairs.rs index d81ddab52..e286a57de 100644 --- a/upstairs/src/downstairs.rs +++ b/upstairs/src/downstairs.rs @@ -442,11 +442,26 @@ impl Downstairs { /// Helper function to set all 3x clients as active, legally #[cfg(test)] pub fn force_active(&mut self) { + let up_state = UpstairsState::GoActive(BlockRes::dummy()); for cid in ClientId::iter() { - self.clients[cid].checked_state_transition( - &UpstairsState::Initializing, - DsState::Active, - ); + for state in [ + NegotiationState::WaitActive, + NegotiationState::WaitForPromote, + NegotiationState::WaitForRegionInfo, + NegotiationState::GetExtentVersions, + NegotiationState::WaitQuorum, + NegotiationState::Reconcile, + ] { + self.clients[cid].checked_state_transition( + &up_state, + DsState::Connecting { + state, + mode: ConnectionMode::New, + }, + ); + } + self.clients[cid] + .checked_state_transition(&up_state, DsState::Active); } } @@ -3622,12 +3637,7 @@ impl Downstairs { fn repair_test_all_active() -> Self { let mut ds = Self::test_default(); - for cid in ClientId::iter() { - ds.clients[cid].checked_state_transition( - &UpstairsState::Active, - DsState::Active, - ); - } + ds.force_active(); let mut ddef = RegionDefinition::default(); ddef.set_block_size(512); @@ -3646,15 +3656,7 @@ impl Downstairs { // Set one of the clients to want a repair let to_repair = ClientId::new(1); - ds.fault_client( - to_repair, - &UpstairsState::Active, - ClientFaultReason::RequestedFault, - ); - ds.clients[to_repair].checked_state_transition( - &UpstairsState::Active, - DsState::LiveRepair, - ); + test::move_to_live_repair(&mut ds, to_repair); // At this point you might think it makes sense to run // `self.start_live_repair(&UpstairsState::Active, gw, 3, 0);` @@ -4377,7 +4379,7 @@ pub(crate) mod test { downstairs::{LiveRepairData, LiveRepairState, ReconcileData}, live_repair::ExtentInfo, upstairs::UpstairsState, - BlockOpWaiter, ClientId, CrucibleError, DsState, ExtentFix, + BlockOpWaiter, BlockRes, ClientId, CrucibleError, DsState, ExtentFix, ExtentRepairIDs, IOState, IOop, ImpactedAddr, ImpactedBlocks, JobId, RawReadResponse, ReconcileIO, ReconcileIOState, ReconciliationId, SnapshotDetails, @@ -4456,7 +4458,10 @@ pub(crate) mod test { } /// Helper function to legally move the given client to live-repair - fn move_to_live_repair(ds: &mut Downstairs, to_repair: ClientId) { + pub(super) fn move_to_live_repair( + ds: &mut Downstairs, + to_repair: ClientId, + ) { to_live_repair_ready(ds, to_repair); ds.clients[to_repair].checked_state_transition( &UpstairsState::Active, @@ -4466,16 +4471,18 @@ pub(crate) mod test { fn set_all_reconcile(ds: &mut Downstairs) { let mode = ConnectionMode::New; + let up_state = UpstairsState::GoActive(BlockRes::dummy()); for cid in ClientId::iter() { for state in [ - NegotiationState::Start { auto_promote: true }, + NegotiationState::WaitActive, NegotiationState::WaitForPromote, NegotiationState::WaitForRegionInfo, NegotiationState::GetExtentVersions, + NegotiationState::WaitQuorum, NegotiationState::Reconcile, ] { ds.clients[cid].checked_state_transition( - &UpstairsState::Active, + &up_state, DsState::Connecting { state, mode }, ); } @@ -6136,7 +6143,6 @@ pub(crate) mod test { }, ), ])); - set_all_reconcile(&mut ds); // Send the first reconciliation req assert!(!ds.send_next_reconciliation_req()); diff --git a/upstairs/src/upstairs.rs b/upstairs/src/upstairs.rs index de5cd9cff..a87fa7018 100644 --- a/upstairs/src/upstairs.rs +++ b/upstairs/src/upstairs.rs @@ -2176,7 +2176,6 @@ pub(crate) mod test { })); let mode = ConnectionMode::Faulted; for state in [ - NegotiationState::Start { auto_promote: true }, NegotiationState::WaitForPromote, NegotiationState::WaitForRegionInfo, NegotiationState::GetExtentVersions, From a033afb28ea2e928437fe72d6523ec77cee27c24 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Thu, 12 Dec 2024 14:28:41 -0500 Subject: [PATCH 19/21] Add RUST_BACKTRACE=1 --- .github/buildomat/jobs/test-ds.sh | 1 + .github/buildomat/jobs/test-live-repair.sh | 1 + .github/buildomat/jobs/test-memory.sh | 1 + .github/buildomat/jobs/test-region-create.sh | 1 + .github/buildomat/jobs/test-repair.sh | 1 + .github/buildomat/jobs/test-replay.sh | 1 + .github/buildomat/jobs/test-up-encrypted.sh | 1 + .github/buildomat/jobs/test-up-unencrypted.sh | 1 + 8 files changed, 8 insertions(+) diff --git a/.github/buildomat/jobs/test-ds.sh b/.github/buildomat/jobs/test-ds.sh index ff217f781..eee478c73 100755 --- a/.github/buildomat/jobs/test-ds.sh +++ b/.github/buildomat/jobs/test-ds.sh @@ -39,6 +39,7 @@ for t in "$input/bins/"*.gz; do done export BINDIR=/var/tmp/bins +export RUST_BACKTRACE=1 banner test_ds ptime -m bash "$input/scripts/test_ds.sh" diff --git a/.github/buildomat/jobs/test-live-repair.sh b/.github/buildomat/jobs/test-live-repair.sh index 61d702dc6..5dfcf9694 100644 --- a/.github/buildomat/jobs/test-live-repair.sh +++ b/.github/buildomat/jobs/test-live-repair.sh @@ -78,6 +78,7 @@ for t in "$input/bins/"*.gz; do done export BINDIR=/var/tmp/bins +export RUST_BACKTRACE=1 echo "BINDIR is $BINDIR" echo "bindir contains:" diff --git a/.github/buildomat/jobs/test-memory.sh b/.github/buildomat/jobs/test-memory.sh index 6c5659d98..fb83051d5 100755 --- a/.github/buildomat/jobs/test-memory.sh +++ b/.github/buildomat/jobs/test-memory.sh @@ -39,6 +39,7 @@ for t in "$input/rbins/"*.gz; do done export BINDIR=/var/tmp/bins +export RUST_BACKTRACE=1 banner setup pfexec plimit -n 9123456 $$ diff --git a/.github/buildomat/jobs/test-region-create.sh b/.github/buildomat/jobs/test-region-create.sh index c63f134f0..c82feebbc 100755 --- a/.github/buildomat/jobs/test-region-create.sh +++ b/.github/buildomat/jobs/test-region-create.sh @@ -39,6 +39,7 @@ for t in "$input/rbins/"*.gz; do done export BINDIR=/var/tmp/bins +export RUST_BACKTRACE=1 banner region pfexec plimit -n 9123456 $$ diff --git a/.github/buildomat/jobs/test-repair.sh b/.github/buildomat/jobs/test-repair.sh index 48b1e6354..f66cd8721 100644 --- a/.github/buildomat/jobs/test-repair.sh +++ b/.github/buildomat/jobs/test-repair.sh @@ -45,6 +45,7 @@ for t in "$input/bins/"*.gz; do done export BINDIR=/var/tmp/bins +export RUST_BACKTRACE=1 echo "Setup self timeout" # Give this test two hours to finish diff --git a/.github/buildomat/jobs/test-replay.sh b/.github/buildomat/jobs/test-replay.sh index a4bdfe9af..d87f05b93 100644 --- a/.github/buildomat/jobs/test-replay.sh +++ b/.github/buildomat/jobs/test-replay.sh @@ -43,6 +43,7 @@ for t in "$input/bins/"*.gz; do done export BINDIR=/var/tmp/bins +export RUST_BACKTRACE=1 banner setup echo "Setup self timeout" diff --git a/.github/buildomat/jobs/test-up-encrypted.sh b/.github/buildomat/jobs/test-up-encrypted.sh index 8ff024c9e..b8a1b3f96 100644 --- a/.github/buildomat/jobs/test-up-encrypted.sh +++ b/.github/buildomat/jobs/test-up-encrypted.sh @@ -41,6 +41,7 @@ for t in "$input/bins/"*.gz; do done export BINDIR=/var/tmp/bins +export RUST_BACKTRACE=1 # Give this test one hour to finish jobpid=$$; (sleep $(( 60 * 60 )); banner fail-timeout; ps -ef; zfs list;kill $jobpid) & diff --git a/.github/buildomat/jobs/test-up-unencrypted.sh b/.github/buildomat/jobs/test-up-unencrypted.sh index ea4f74049..55ace65f9 100644 --- a/.github/buildomat/jobs/test-up-unencrypted.sh +++ b/.github/buildomat/jobs/test-up-unencrypted.sh @@ -41,6 +41,7 @@ for t in "$input/bins/"*.gz; do done export BINDIR=/var/tmp/bins +export RUST_BACKTRACE=1 # Give this test two hours to finish jobpid=$$; (sleep $(( 120 * 60 )); banner fail-timeout; ps -ef; zfs list;kill $jobpid) & From 6a1bde5f3a11c303e70b2814fcba1fbce0239c8e Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Thu, 12 Dec 2024 14:50:26 -0500 Subject: [PATCH 20/21] Allow Connecting -> Connecting transition --- upstairs/src/client.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index 781382b12..ca96fc659 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -972,6 +972,16 @@ impl DownstairsClient { use NegotiationState as N; use UpstairsState as U; match (prev_state, next_state) { + ( + D::Connecting { .. }, + D::Connecting { + state: N::Start { .. }, + .. + }, + ) => { + // restarting negotiation is allowed + true + } ( D::Connecting { state: prev_state, @@ -982,6 +992,7 @@ impl DownstairsClient { mode: next_mode, }, ) => { + // Check normal negotiation path if next_mode == C::New && matches!(up_state, U::Active) { return false; } From 26e7d501b33f47379e53601720353c2983e7654b Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Thu, 12 Dec 2024 15:15:56 -0500 Subject: [PATCH 21/21] Allow Replacing -> New with auto_promote = true --- upstairs/src/client.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index ca96fc659..e33d83e19 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -1057,13 +1057,7 @@ impl DownstairsClient { C::Replaced, N::Start { auto_promote: true } ) - | ( - R::Replacing, - C::New, - N::Start { - auto_promote: false - } - ) + | (R::Replacing, C::New, N::Start { .. }) | (R::NegotiationFailed(..), C::New, N::Start { .. }) ) }