From 354277e5a6943d3cb4d8262d5a2a77da2f8a8040 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Tue, 3 Dec 2024 10:25:54 -0500 Subject: [PATCH] Update comments --- upstairs/src/client.rs | 58 ++++++++++++++++++++++--- upstairs/src/lib.rs | 97 +++++++++++------------------------------- 2 files changed, 76 insertions(+), 79 deletions(-) diff --git a/upstairs/src/client.rs b/upstairs/src/client.rs index db8743544..f33555f86 100644 --- a/upstairs/src/client.rs +++ b/upstairs/src/client.rs @@ -1813,6 +1813,47 @@ impl DownstairsClient { } /// Tracks client negotiation progress +/// +/// The exact path through negotiation depends on the [`ConnectionMode`]. +/// +/// There are three main paths, shown below: +/// +/// ```text +/// ┌───────┐ +/// │ Start ├────────┐ +/// └───┬───┘ │ +/// │ │ +/// ┌─────▼──────┐ │ +/// │ WaitActive │ │ auto-promote +/// └─────┬──────┘ │ +/// │ │ +/// ┌───────▼────────┐ │ +/// │ WaitForPromote ◄───┘ +/// └───────┬────────┘ +/// │ +/// ┌────────▼──────────┐ +/// │ WaitForRegionInfo │ +/// └──┬──────────────┬─┘ +/// Offline │ │ New / Faulted / Replaced +/// ┌──────▼─────┐ ┌────▼────────────┐ +/// │GetLastFlush│ │GetExtentVersions│ +/// └──────┬─────┘ └─┬─────────────┬─┘ +/// │ │ New │ Faulted / Replaced +/// │ ┌──────▼───┐ ┌────▼──────────┐ +/// │ │WaitQuorum│ │LiveRepairReady│ +/// │ └────┬─────┘ └────┬──────────┘ +/// │ │ │ +/// │ ┌────▼────┐ │ +/// │ │Reconcile│ │ +/// │ └────┬────┘ │ +/// │ │ │ +/// │ ┌───▼──┐ │ +/// └─────► Done ◄────────────┘ +/// └──────┘ +/// ``` +/// +/// `Done` isn't actually present in the state machine; it's indicated by +/// returning a [`NegotiationResult`] other than [`NegotiationResult::NotDone`]. #[derive( Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize, JsonSchema, )] @@ -1823,21 +1864,26 @@ pub enum NegotiationState { /// /// Once this message is heard, transitions to either `WaitActive` (if /// `auto_promote` is `false`) or `WaitQuorum` (if `auto_promote` is `true`) - Start { - auto_promote: bool, - }, + Start { auto_promote: bool }, /// Waiting for activation by the guest WaitActive, - /// Waiting for the minimum number of downstairs to be present. - WaitQuorum, - + /// Waiting to hear `YouAreNowActive` from the client WaitForPromote, + + /// Waiting to hear `RegionInfo` from the client WaitForRegionInfo, + + /// Waiting to hear `LastFlushAck` from the client GetLastFlush, + + /// Waiting to hear `ExtentVersions` from the client GetExtentVersions, + /// Waiting for the minimum number of downstairs to be present. + WaitQuorum, + /// Initial startup, downstairs are repairing from each other. Reconcile, diff --git a/upstairs/src/lib.rs b/upstairs/src/lib.rs index b409cc140..dc215e886 100644 --- a/upstairs/src/lib.rs +++ b/upstairs/src/lib.rs @@ -714,78 +714,24 @@ pub(crate) struct RawReadResponse { pub data: bytes::BytesMut, } -/* - * States of a downstairs - * - * This shows the different states a downstairs can be in from the point of - * view of the upstairs. - * - * Double line paths can only be taken if an upstairs is active and goes to - * deactivated. - * - * │ - * ┌──┐ ▼ - * bad│ │ │ - * version│ ┌▼───┴──────┐ - * └─┤ ╞═════◄══════════════════╗ - * ┌─────────────► New ╞═════◄════════════════╗ ║ - * │ ┌─────► ├─────◄──────┐ ║ ║ - * │ │ └────┬───┬──┘ │ ║ ║ - * │ │ ▼ └───►───┐ other │ ║ ║ - * │ bad│ ┌────┴──────┐ │ failures ║ ║ - * │ region│ │ Wait │ │ ▲ ║ ║ - * │ │ │ Active ├─►┐ │ │ ║ ║ - * │ │ └────┬──────┘ │ │ │ ║ ║ - * │ │ ┌────┴──────┐ │ └───────┤ ║ ║ - * │ │ │ Wait │ └─────────┤ ║ ║ - * │ └─────┤ Quorum ├──►─────────┤ ║ ║ - * │ └────┬──────┘ │ ║ ║ - * │ ........▼.......... │ ║ ║ - * │failed : ┌────┴──────┐ : │ ║ ║ - * │reconcile : │ Reconcile │ : │ ╔═╝ ║ - * └─────────────┤ ├──►─────────┘ ║ ║ - * : └────┬──────┘ : ║ ║ - * Not Active : │ : ▲ ▲ Not Active - * .............. . . . │. . . . ...................║...║............ - * Active ▼ ║ ║ Active - * ┌────┴──────┐ ┌──────────╨┐ ║ - * ┌─►─┤ Active ├─────►───┤Deactivated│ ║ - * │ │ │ ┌──────┤ ├─◄──────┐ - * │ └─┬───┬───┬─┘ │ └───────────┘ ║ │ - * │ ▼ ▼ ▲ ▲ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ │ │ │ ║ │ - * │ │ ▼ ▲ ▲ ║ │ - * │ │ │ │ │ ▲ │ - * │ │ ┌─┴───┴────┴┐ ┌────────────╨──┐ │ - * │ │ │ Offline │ │ Faulted │ │ - * │ │ │ ├─────►─┤ │ │ - * │ │ └───────────┘ └─┬─┬───────┬─┬─┘ │ - * │ │ ▲ ▲ ▼ ▲ ▲ - * │ └───────────►───────────┘ │ │ │ │ - * │ │ │ │ │ - * │ ┌────────┴─┐ ┌─┴─┴────┴─┐ - * └──────────────────────┤ Live ├─◄─┤ Live │ - * │ Repair │ │ Repair │ - * │ │ │ Ready │ - * └──────────┘ └──────────┘ - * - * - * The downstairs state can go to Disabled from any other state, as that - * transition happens when a message is received from the actual - * downstairs on the other side of the connection.. - * The only path back at that point is for the Upstairs (who will self - * deactivate when it detects this) is to go back to New and through - * the reconcile process. - * ┌───────────┐ - * │ Disabled │ - * └───────────┘ - */ +/// High-level states for a Downstairs +/// +/// The state machine for a Downstairs is relatively simple: +/// +/// ```text +/// ┌────────────┐ +/// ┌────► LiveRepair ├─────┐ +/// ┌─────────┴┐ └─────┬──────┘ ┌─▼──────┐ +/// │Connecting│ │ │Stopping│ +/// └─▲───────┬┘ ┌─────▼──────┐ └─▲────┬─┘ +/// │ └────► Active ├─────┘ │ +/// │ └─────┬──────┘ │ +/// │ │ │ +/// └─────────────────◄┴─────────────────┘ +/// ``` +/// +/// Complexity is hidden in the `Connecting` state, which wraps a +/// [`NegotiationState`] implementing the negotiation state machine. #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] @@ -806,6 +752,7 @@ pub enum DsState { /// The IO task for the client is being stopped Stopping(ClientStopReason), } + impl std::fmt::Display for DsState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -873,9 +820,13 @@ impl std::fmt::Display for DsState { #[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "snake_case")] pub enum ConnectionMode { - Offline, + /// Connect through reconciliation once a quorum has come online New, + /// Replay cached jobs when reconnecting + Offline, + /// Reconnect through live-repair Faulted, + /// Reconnect through live-repair; the address is allowed to change Replaced, }