From 5b35c448e688bb1d9bb27fdd08144be804abdf87 Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Fri, 11 Aug 2023 10:34:16 -0500 Subject: [PATCH] don't declare time synchronization too early (#3872) This changes the logic for deciding when NTP is synchronized to match that of `chronyc waitsync`; that is, if chrony has not selected a reference source, do not consider ourselves synchronized. --- openapi/sled-agent.json | 17 ++++++++++++----- sled-agent/src/params.rs | 8 +++++--- sled-agent/src/rack_setup/service.rs | 7 ++++++- sled-agent/src/services.rs | 26 ++++++++++++++++++++------ 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 2fe887f2fe..5e34227a25 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2418,10 +2418,16 @@ "type": "number", "format": "double" }, - "skew": { - "description": "The estimated error bound on the frequency.", - "type": "number", - "format": "double" + "ip_addr": { + "description": "The NTP reference IP address.", + "type": "string", + "format": "ip" + }, + "ref_id": { + "description": "The NTP reference ID.", + "type": "integer", + "format": "uint32", + "minimum": 0 }, "sync": { "description": "The synchronization state of the sled, true when the system clock and the NTP clock are in sync (to within a small window).", @@ -2430,7 +2436,8 @@ }, "required": [ "correction", - "skew", + "ip_addr", + "ref_id", "sync" ] }, diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 64a3bf520a..fa4eab265f 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -823,12 +823,14 @@ pub struct TimeSync { /// The synchronization state of the sled, true when the system clock /// and the NTP clock are in sync (to within a small window). pub sync: bool, - // These could both be f32, but there is a problem with progenitor/typify + /// The NTP reference ID. + pub ref_id: u32, + /// The NTP reference IP address. + pub ip_addr: IpAddr, + // This could be f32, but there is a problem with progenitor/typify // where, although the f32 correctly becomes "float" (and not "double") in // the API spec, that "float" gets converted back to f64 when generating // the client. - /// The estimated error bound on the frequency. - pub skew: f64, /// The current offset between the NTP clock and system clock. pub correction: f64, } diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index a445cec8cd..d127512aa6 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -451,7 +451,12 @@ impl ServiceInner { ); let ts = client.timesync_get().await?.into_inner(); - Ok(TimeSync { sync: ts.sync, skew: ts.skew, correction: ts.correction }) + Ok(TimeSync { + sync: ts.sync, + ref_id: ts.ref_id, + ip_addr: ts.ip_addr, + correction: ts.correction, + }) } async fn wait_for_timesync( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 670a7d7b69..7a2224bb9d 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -109,6 +109,8 @@ use tokio::sync::MutexGuard; use tokio::task::JoinHandle; use uuid::Uuid; +const IPV6_UNSPECIFIED: IpAddr = IpAddr::V6(Ipv6Addr::UNSPECIFIED); + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Failed to initialize CockroachDb: {err}")] @@ -2327,7 +2329,12 @@ impl ServiceManager { if let Some(true) = self.inner.skip_timesync { info!(self.inner.log, "Configured to skip timesync checks"); self.boottime_rewrite(existing_zones.values()); - return Ok(TimeSync { sync: true, skew: 0.00, correction: 0.00 }); + return Ok(TimeSync { + sync: true, + ref_id: 0, + ip_addr: IPV6_UNSPECIFIED, + correction: 0.00, + }); }; let ntp_zone_name = @@ -2350,19 +2357,26 @@ impl ServiceManager { let v: Vec<&str> = stdout.split(',').collect(); if v.len() > 9 { - let correction = f64::from_str(v[4]) + let ref_id = u32::from_str_radix(v[0], 16) .map_err(|_| Error::NtpZoneNotReady)?; - let skew = f64::from_str(v[9]) + let ip_addr = + IpAddr::from_str(v[1]).unwrap_or(IPV6_UNSPECIFIED); + let correction = f64::from_str(v[4]) .map_err(|_| Error::NtpZoneNotReady)?; - let sync = (skew != 0.0 || correction != 0.0) - && correction.abs() <= 0.05; + // Per `chronyc waitsync`'s implementation, if either the + // reference IP address is not unspecified or the reference + // ID is not 0 or 0x7f7f0101, we are synchronized to a peer. + let peer_sync = !ip_addr.is_unspecified() + || (ref_id != 0 && ref_id != 0x7f7f0101); + + let sync = peer_sync && correction.abs() <= 0.05; if sync { self.boottime_rewrite(existing_zones.values()); } - Ok(TimeSync { sync, skew, correction }) + Ok(TimeSync { sync, ref_id, ip_addr, correction }) } else { Err(Error::NtpZoneNotReady) }