Skip to content

Commit

Permalink
don't declare time synchronization too early (#3872)
Browse files Browse the repository at this point in the history
This changes the logic for deciding when NTP is synchronized to match
that of `chronyc waitsync`; that is, if chrony has not selected a
reference source, do not consider ourselves synchronized.
  • Loading branch information
iliana authored Aug 11, 2023
1 parent f816500 commit 5b35c44
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 15 deletions.
17 changes: 12 additions & 5 deletions openapi/sled-agent.json
Original file line number Diff line number Diff line change
Expand Up @@ -2418,10 +2418,16 @@
"type": "number",
"format": "double"
},
"skew": {
"description": "The estimated error bound on the frequency.",
"type": "number",
"format": "double"
"ip_addr": {
"description": "The NTP reference IP address.",
"type": "string",
"format": "ip"
},
"ref_id": {
"description": "The NTP reference ID.",
"type": "integer",
"format": "uint32",
"minimum": 0
},
"sync": {
"description": "The synchronization state of the sled, true when the system clock and the NTP clock are in sync (to within a small window).",
Expand All @@ -2430,7 +2436,8 @@
},
"required": [
"correction",
"skew",
"ip_addr",
"ref_id",
"sync"
]
},
Expand Down
8 changes: 5 additions & 3 deletions sled-agent/src/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -823,12 +823,14 @@ pub struct TimeSync {
/// The synchronization state of the sled, true when the system clock
/// and the NTP clock are in sync (to within a small window).
pub sync: bool,
// These could both be f32, but there is a problem with progenitor/typify
/// The NTP reference ID.
pub ref_id: u32,
/// The NTP reference IP address.
pub ip_addr: IpAddr,
// This could be f32, but there is a problem with progenitor/typify
// where, although the f32 correctly becomes "float" (and not "double") in
// the API spec, that "float" gets converted back to f64 when generating
// the client.
/// The estimated error bound on the frequency.
pub skew: f64,
/// The current offset between the NTP clock and system clock.
pub correction: f64,
}
Expand Down
7 changes: 6 additions & 1 deletion sled-agent/src/rack_setup/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,12 @@ impl ServiceInner {
);

let ts = client.timesync_get().await?.into_inner();
Ok(TimeSync { sync: ts.sync, skew: ts.skew, correction: ts.correction })
Ok(TimeSync {
sync: ts.sync,
ref_id: ts.ref_id,
ip_addr: ts.ip_addr,
correction: ts.correction,
})
}

async fn wait_for_timesync(
Expand Down
26 changes: 20 additions & 6 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ use tokio::sync::MutexGuard;
use tokio::task::JoinHandle;
use uuid::Uuid;

const IPV6_UNSPECIFIED: IpAddr = IpAddr::V6(Ipv6Addr::UNSPECIFIED);

#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Failed to initialize CockroachDb: {err}")]
Expand Down Expand Up @@ -2327,7 +2329,12 @@ impl ServiceManager {
if let Some(true) = self.inner.skip_timesync {
info!(self.inner.log, "Configured to skip timesync checks");
self.boottime_rewrite(existing_zones.values());
return Ok(TimeSync { sync: true, skew: 0.00, correction: 0.00 });
return Ok(TimeSync {
sync: true,
ref_id: 0,
ip_addr: IPV6_UNSPECIFIED,
correction: 0.00,
});
};

let ntp_zone_name =
Expand All @@ -2350,19 +2357,26 @@ impl ServiceManager {
let v: Vec<&str> = stdout.split(',').collect();

if v.len() > 9 {
let correction = f64::from_str(v[4])
let ref_id = u32::from_str_radix(v[0], 16)
.map_err(|_| Error::NtpZoneNotReady)?;
let skew = f64::from_str(v[9])
let ip_addr =
IpAddr::from_str(v[1]).unwrap_or(IPV6_UNSPECIFIED);
let correction = f64::from_str(v[4])
.map_err(|_| Error::NtpZoneNotReady)?;

let sync = (skew != 0.0 || correction != 0.0)
&& correction.abs() <= 0.05;
// Per `chronyc waitsync`'s implementation, if either the
// reference IP address is not unspecified or the reference
// ID is not 0 or 0x7f7f0101, we are synchronized to a peer.
let peer_sync = !ip_addr.is_unspecified()
|| (ref_id != 0 && ref_id != 0x7f7f0101);

let sync = peer_sync && correction.abs() <= 0.05;

if sync {
self.boottime_rewrite(existing_zones.values());
}

Ok(TimeSync { sync, skew, correction })
Ok(TimeSync { sync, ref_id, ip_addr, correction })
} else {
Err(Error::NtpZoneNotReady)
}
Expand Down

0 comments on commit 5b35c44

Please sign in to comment.