Skip to content

Commit

Permalink
Update user accounting database once time is synced (#2815)
Browse files Browse the repository at this point in the history
(PST within the zones notwithstanding, see
oxidecomputer/helios-omicron-brand#4)

This change rewrites the user accounting databases for all running zones
(including the GZ) once time is synchronised. It uses the
`/usr/platform/oxide/bin/tmpx` command which is available in very recent
stlouis. If that's missing, it will just carry on regardless, leaving
the
system uptime showing 13k days as before.

```
{"msg":"Timesync for [fd00:1122:3344:101::1]:12345 TimeSync { sync: false, skew: 0.0, correction: 0.0 }","v":0,"name":"SledAgent","level":30,"time":"1986-12-28T00:15:45.669141687Z","hostname":"gimlet-sn06","pid":101256,"component":"RSS"}
{"msg":"Time is not yet synchronized","v":0,"name":"SledAgent","level":40,"time":"1986-12-28T00:15:45.669154267Z","hostname":"gimlet-sn06","pid":101256,"component":"RSS","error":"\"Time is synchronized on 0/1 sleds\""}
...
gimlet-sn06 # date; uptime
Sun Dec 28 00:26:49 UTC 1986
00:26:49    up 1 min(s),  2 users,  load average: 1.31, 1.54, 0.77
gimlet-sn06 # zlogin oxz_internal_dns 'date; uptime'
Sat Dec 27 16:27:07 PST 1986
16:27:07    up 1 min(s),  0 users,  load average: 0.00, 0.00, 0.00
...
{"msg":"Timesync for [fd00:1122:3344:101::1]:12345 TimeSync { sync: true, skew: 117.186, correction: 2.207e-5 }","v":0,"name":"SledAgent","level":30,"time":"2023-04-11T22:34:20.543455275Z","hostname":"gimlet-sn06","pid":101256,"component":"RSS"}
...
gimlet-sn06 # date; uptime
Tue Apr 11 23:28:19 UTC 2023
23:28:19    up 1 min(s),  2 users,  load average: 4.75, 2.14, 1.10
gimlet-sn06 # zlogin oxz_internal_dns 'date; uptime'
Tue Apr 11 16:28:23 PDT 2023
16:28:23    up 1 min(s),  0 users,  load average: 0.00, 0.00, 0.00
```
  • Loading branch information
citrus-it authored Apr 12, 2023
1 parent 8040d19 commit 1d7a852
Showing 1 changed file with 62 additions and 8 deletions.
70 changes: 62 additions & 8 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ use illumos_utils::running_zone::{InstalledZone, RunningZone};
use illumos_utils::zfs::ZONE_ZFS_DATASET_MOUNTPOINT;
use illumos_utils::zone::AddressRequest;
use illumos_utils::zone::Zones;
use illumos_utils::{execute, PFEXEC};
use omicron_common::address::Ipv6Subnet;
use omicron_common::address::BOOTSTRAP_ARTIFACT_PORT;
use omicron_common::address::CRUCIBLE_PANTRY_PORT;
Expand All @@ -56,11 +57,14 @@ use sled_hardware::underlay;
use sled_hardware::SledMode;
use slog::Logger;
use std::collections::HashSet;
use std::iter;
use std::iter::FromIterator;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::{SystemTime, UNIX_EPOCH};
use tokio::io::AsyncWriteExt;
use tokio::sync::oneshot;
use tokio::sync::Mutex;
Expand Down Expand Up @@ -244,6 +248,7 @@ pub struct ServiceManagerInner {
switch_zone: Mutex<SledLocalZone>,
sled_mode: SledMode,
skip_timesync: Option<bool>,
time_synced: AtomicBool,
sidecar_revision: String,
zones: Mutex<Vec<RunningZone>>,
underlay_vnic_allocator: VnicAllocator<Etherstub>,
Expand Down Expand Up @@ -279,6 +284,7 @@ impl ServiceManager {
/// - `bootstrap_etherstub`: Etherstub used to allocate bootstrap service vNICs.
/// - `sled_mode`: The sled's mode of operation (Gimlet vs Scrimlet).
/// - `skip_timesync`: If true, the sled always reports synced time.
/// - `time_synced`: If true, time sync was achieved.
/// - `sidecar_revision`: Rev of attached sidecar, if present.
/// - `switch_zone_bootstrap_address`: The bootstrap IP to use for the switch zone.
#[allow(clippy::too_many_arguments)]
Expand All @@ -302,6 +308,7 @@ impl ServiceManager {
switch_zone: Mutex::new(SledLocalZone::Disabled),
sled_mode,
skip_timesync,
time_synced: AtomicBool::new(false),
sidecar_revision,
zones: Mutex::new(vec![]),
underlay_vnic_allocator: VnicAllocator::new(
Expand Down Expand Up @@ -1204,14 +1211,59 @@ impl ServiceManager {
Ok(())
}

pub fn boottime_rewrite(&self, zones: &Vec<RunningZone>) {
if self
.inner
.time_synced
.compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
.is_err()
{
// Already done.
return;
}

let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("SystemTime before UNIX EPOCH");

info!(self.inner.log, "Setting boot time to {:?}", now);

let files: Vec<String> = zones
.iter()
.map(|z| z.root())
.chain(iter::once("".to_string()))
.flat_map(|r| {
[format!("{r}/var/adm/utmpx"), format!("{r}/var/adm/wtmpx")]
})
.collect();

for file in files {
let mut command = std::process::Command::new(PFEXEC);
let cmd = command.args(&[
"/usr/platform/oxide/bin/tmpx",
&format!("{}", now.as_secs()),
&file,
]);
match execute(cmd) {
Err(e) => {
warn!(self.inner.log, "Updating {} failed: {}", &file, e);
}
Ok(_) => {
info!(self.inner.log, "Updated {}", &file);
}
}
}
}

pub async fn timesync_get(&self) -> Result<TimeSync, Error> {
let existing_zones = self.inner.zones.lock().await;

if let Some(true) = self.inner.skip_timesync {
info!(self.inner.log, "Configured to skip timesync checks");
self.boottime_rewrite(&existing_zones);
return Ok(TimeSync { sync: true, skew: 0.00, correction: 0.00 });
};

let existing_zones = self.inner.zones.lock().await;

let ntp_zone_name =
InstalledZone::get_zone_name(&ZoneType::NTP.to_string(), None);

Expand All @@ -1236,12 +1288,14 @@ impl ServiceManager {
let skew = f64::from_str(v[9])
.map_err(|_| Error::NtpZoneNotReady)?;

Ok(TimeSync {
sync: (skew != 0.0 || correction != 0.0)
&& correction.abs() <= 0.05,
skew,
correction,
})
let sync = (skew != 0.0 || correction != 0.0)
&& correction.abs() <= 0.05;

if sync {
self.boottime_rewrite(&existing_zones);
}

Ok(TimeSync { sync, skew, correction })
} else {
Err(Error::NtpZoneNotReady)
}
Expand Down

0 comments on commit 1d7a852

Please sign in to comment.