From 17248a1c924e3fb030b04765441a23f3e1ea29eb Mon Sep 17 00:00:00 2001 From: Diwakar Sharma Date: Mon, 9 Dec 2024 13:34:17 +0000 Subject: [PATCH] fix(nexus): don't persist if child faults during nexus create When nexus is being created with a single child, and that child goes into retire path before nexus is open, then the child gets persisted as unhealthy. This will cause volume to never be able to attach later on. Signed-off-by: Diwakar Sharma --- io-engine/src/bdev/nexus/nexus_persistence.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/io-engine/src/bdev/nexus/nexus_persistence.rs b/io-engine/src/bdev/nexus/nexus_persistence.rs index 68c03e559..7aaf87845 100644 --- a/io-engine/src/bdev/nexus/nexus_persistence.rs +++ b/io-engine/src/bdev/nexus/nexus_persistence.rs @@ -103,6 +103,15 @@ impl<'n> Nexus<'n> { }; nexus_info.children.push(child_info); }); + // We started with this child because it was healthy in etcd, or isn't there at all. + // Being unhealthy here means it is undergoing a fault/retire before nexus is open. + if nexus_info.children.len() == 1 && !nexus_info.children[0].healthy { + warn!("{self:?} Not persisting: the only child went unhealthy during nexus creation"); + return Err(Error::NexusCreate { + name: self.name.clone(), + reason: "only child is unhealthy".to_string(), + }); + } } PersistOp::AddChild { child_uri, healthy } => { // Add the state of a new child. This should only be called