Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

internal NTP: resolve boundary NTP sources from DNS in addition to being told explicitly #6050

Merged
merged 8 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion internal-dns/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
//!
//! This module provides types used to assemble that configuration.

use crate::names::{ServiceName, DNS_ZONE};
use crate::names::{ServiceName, BOUNDARY_NTP_DNS_NAME, DNS_ZONE};
use anyhow::{anyhow, ensure};
use core::fmt;
use dns_service_client::types::{DnsConfigParams, DnsConfigZone, DnsRecord};
Expand Down Expand Up @@ -407,6 +407,27 @@ impl DnsConfigBuilder {
(name, vec![DnsRecord::Aaaa(sled_ip)])
});

// Assemble the special boundary NTP name to support chrony on internal
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth adding a unit test for this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm on the fence - the expanded test_blueprint_internal_dns_basic does check for this name. If you'd like a unit test specific to this crate, I can add one?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's worth it, mainly because I think it should just be 1-2 lines of code in test_builder_output() below (plus regenerate the expectorate file).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure enough! Added in 24b7562

// NTP zones.
//
// We leave this as `None` if there are no `BoundaryNtp` service zones,
// which omits it from the final set of records.
let boundary_ntp_records = self
.service_instances_zones
.get(&ServiceName::BoundaryNtp)
.map(|zone2port| {
let records = zone2port
.iter()
.map(|(zone, _port)| {
let zone_ip = self.zones.get(&zone).expect(
"service_backend_zone() ensures zones are defined",
);
DnsRecord::Aaaa(*zone_ip)
})
.collect::<Vec<DnsRecord>>();
(BOUNDARY_NTP_DNS_NAME.to_string(), records)
});

// Assemble the set of AAAA records for zones.
let zone_records = self.zones.into_iter().map(|(zone, zone_ip)| {
(zone.dns_name(), vec![DnsRecord::Aaaa(zone_ip)])
Expand Down Expand Up @@ -454,6 +475,7 @@ impl DnsConfigBuilder {

let all_records = sled_records
.chain(zone_records)
.chain(boundary_ntp_records)
.chain(srv_records_sleds)
.chain(srv_records_zones)
.collect();
Expand Down Expand Up @@ -593,6 +615,11 @@ mod test {
b.service_backend_zone(ServiceName::Oximeter, &zone2, 125).unwrap();
b.service_backend_zone(ServiceName::Oximeter, &zone3, 126).unwrap();

// Add a boundary NTP service to one of the zones; this will also
// populate the special `BOUNDARY_NTP_DNS_NAME`.
b.service_backend_zone(ServiceName::BoundaryNtp, &zone2, 127)
.unwrap();

// A sharded service
b.service_backend_sled(
ServiceName::SledAgent(sled1_uuid),
Expand Down
7 changes: 7 additions & 0 deletions internal-dns/src/names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@

use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid};

/// Name for the special boundary NTP DNS name
///
/// chrony does not support SRV records. This name resolves to AAAA records for
/// each boundary NTP zone, and then we can point internal NTP chrony instances
/// at this name for it to find the boundary NTP zones.
pub const BOUNDARY_NTP_DNS_NAME: &str = "boundary-ntp";

/// Name for the control plane DNS zone
pub const DNS_ZONE: &str = "control-plane.oxide.internal";

Expand Down
17 changes: 17 additions & 0 deletions internal-dns/tests/output/internal-dns-zone.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ builder: "non_trivial"
"data": "::1:4"
}
],
"_boundary-ntp._tcp": [
{
"type": "SRV",
"data": {
"port": 127,
"prio": 0,
"target": "001de000-c04e-4000-8000-000000000002.host.control-plane.oxide.internal",
"weight": 0
}
}
],
"_nexus._tcp": [
{
"type": "SRV",
Expand Down Expand Up @@ -118,5 +129,11 @@ builder: "non_trivial"
"weight": 0
}
}
],
"boundary-ntp": [
{
"type": "AAAA",
"data": "::1:2"
}
]
}
68 changes: 60 additions & 8 deletions nexus/reconfigurator/execution/src/dns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,9 @@ mod test {
use crate::overridables::Overridables;
use crate::Sled;
use dns_service_client::DnsDiff;
use internal_dns::config::Host;
use internal_dns::config::Zone;
use internal_dns::names::BOUNDARY_NTP_DNS_NAME;
use internal_dns::resolver::Resolver;
use internal_dns::ServiceName;
use internal_dns::DNS_ZONE;
Expand Down Expand Up @@ -662,7 +665,7 @@ mod test {
})
.collect();

let blueprint_dns_zone = blueprint_internal_dns_config(
let mut blueprint_dns_zone = blueprint_internal_dns_config(
&blueprint,
&sleds_by_id,
&Default::default(),
Expand All @@ -686,6 +689,10 @@ mod test {
// 4. Our out-of-service zone does *not* appear in the DNS config,
// neither with an AAAA record nor in an SRV record.
//
// 5. The boundary NTP zones' IP addresses are mapped to AAAA records in
// the special boundary DNS name (in addition to having their normal
// zone DNS name -> AAAA record from 1).
//
// Together, this tells us that we have SRV records for all services,
// that those SRV records all point to at least one of the Omicron zones
// for that service, and that we correctly ignored zones that were not
Expand Down Expand Up @@ -720,6 +727,33 @@ mod test {
})
.collect();

// Prune the special boundary NTP DNS name out, collecting their IP
// addresses, and build a list of expected SRV targets to ensure these
// IPs show up both in the special boundary NTP DNS name and as their
// normal SRV records.
let boundary_ntp_ips = blueprint_dns_zone
.records
.remove(BOUNDARY_NTP_DNS_NAME)
.expect("missing boundary NTP DNS name")
.into_iter()
.map(|record| match record {
DnsRecord::Aaaa(ip) => ip,
_ => panic!("expected AAAA record; got {record:?}"),
});
let mut expected_boundary_ntp_srv_targets = boundary_ntp_ips
.map(|ip| {
let Some(zone_id) = omicron_zones_by_ip.get(&ip) else {
panic!("did not find zone ID for boundary NTP IP {ip}");
};
let name = Host::Zone(Zone::Other(*zone_id)).fqdn();
println!(
"Boundary NTP IP {ip} maps to expected \
SRV record target {name}"
);
name
})
.collect::<BTreeSet<_>>();

// Now go through all the DNS names that have AAAA records and remove
// any corresponding Omicron zone. While doing this, construct a set of
// the fully-qualified DNS names (i.e., with the zone name suffix
Expand Down Expand Up @@ -814,6 +848,16 @@ mod test {
]);

for (name, records) in &blueprint_dns_zone.records {
let mut this_kind = None;
let kinds_left: Vec<_> =
srv_kinds_expected.iter().copied().collect();
for kind in kinds_left {
if kind.dns_name() == *name {
srv_kinds_expected.remove(&kind);
this_kind = Some(kind);
}
}

let srvs: Vec<_> = records
.iter()
.filter_map(|dns_record| match dns_record {
Expand All @@ -828,19 +872,27 @@ mod test {
correspond to a name that points to any Omicron zone",
srv.target
);
}

let kinds_left: Vec<_> =
srv_kinds_expected.iter().copied().collect();
for kind in kinds_left {
if kind.dns_name() == *name {
srv_kinds_expected.remove(&kind);
if this_kind == Some(ServiceName::BoundaryNtp) {
assert!(
expected_boundary_ntp_srv_targets.contains(&srv.target),
"found boundary NTP SRV record with target {:?} \
that does not correspond to an expected boundary \
NTP zone",
srv.target,
);
expected_boundary_ntp_srv_targets.remove(&srv.target);
}
}
}

println!("SRV kinds with no records found: {:?}", srv_kinds_expected);
assert!(srv_kinds_expected.is_empty());

println!(
"Boundary NTP SRV targets not found: {:?}",
expected_boundary_ntp_srv_targets
);
assert!(expected_boundary_ntp_srv_targets.is_empty());
}

#[tokio::test]
Expand Down
16 changes: 10 additions & 6 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ use illumos_utils::zfs::ZONE_ZFS_RAMDISK_DATASET_MOUNTPOINT;
use illumos_utils::zone::AddressRequest;
use illumos_utils::zpool::{PathInPool, ZpoolName};
use illumos_utils::{execute, PFEXEC};
use internal_dns::names::BOUNDARY_NTP_DNS_NAME;
use internal_dns::names::DNS_ZONE;
use internal_dns::resolver::Resolver;
use itertools::Itertools;
use nexus_config::{ConfigDropshotWithTls, DeploymentConfig};
Expand Down Expand Up @@ -1994,15 +1996,17 @@ impl ServiceManager {
.add_property(
"boundary",
"boolean",
&is_boundary.to_string(),
is_boundary.to_string(),
)
.add_property(
"boundary_pool",
"astring",
format!("{BOUNDARY_NTP_DNS_NAME}.{DNS_ZONE}"),
);

for s in ntp_servers {
chrony_config = chrony_config.add_property(
"server",
"astring",
&s.to_string(),
);
chrony_config =
chrony_config.add_property("server", "astring", s);
}

let dns_client_service;
Expand Down
14 changes: 12 additions & 2 deletions smf/chrony-setup/manifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,32 @@
</dependency>

<exec_method type='method' name='start'
exec='/opt/oxide/zone-setup-cli/bin/zone-setup chrony-setup -b %{config/boundary} -s %{config/server} -a %{config/allow}'
exec='/opt/oxide/zone-setup-cli/bin/zone-setup chrony-setup -b %{config/boundary} -p %{config/boundary_pool} -s %{config/server} -a %{config/allow}'
timeout_seconds='0'>
<method_context security_flags="aslr">
<method_credential user="root" group="root"
privileges="basic,file_chown" />
</method_context>
</exec_method>


<property_group name='startd' type='framework'>
<propval name='duration' type='astring' value='transient' />
</property_group>

<property_group name="config" type="application">
<!-- Whether this is a boundary or internal NTP zone -->
<propval name="boundary" type="boolean" value="false" />
<!--
DNS name for the pool of boundary NTP servers. (Only used if this is
an internal NTP zone.)
-->
<propval name="boundary_pool" type="astring" value="" />
jgallagher marked this conversation as resolved.
Show resolved Hide resolved
<!--
Upstream NTP server. May be specifid more than once. (At least one is
required.)
-->
<propval name="server" type="astring" value="" />
<!-- Allowed IPv6 range for clients (typically the rack subnet) -->
<propval name="allow" type="astring" value="" />
</property_group>

Expand Down
20 changes: 20 additions & 0 deletions zone-setup/src/bin/zone-setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ struct ChronySetupArgs {
/// allowed IPv6 range
#[arg(short, long)]
allow: Ipv6Net,
/// DNS name for the boundary NTP zone pool
#[arg(
short = 'p',
long,
value_parser = NonEmptyStringValueParser::default(),
)]
boundary_pool: String,
}

// The default clap parser for `serde_json::Value` is to wrap the argument in a
Expand Down Expand Up @@ -396,6 +403,9 @@ makestep 1.0 3
leapsecmode slew
maxslewrate 2708.333

# Refresh boundary NTP servers every two minutes instead of every two weeks
refresh 120

";

let boundary_ntp_tpl = "#
Expand Down Expand Up @@ -447,6 +457,7 @@ maxslewrate 2708.333
boundary: is_boundary,
servers,
allow,
boundary_pool,
} = args;

let mut new_config =
Expand All @@ -464,10 +475,19 @@ maxslewrate 2708.333
.expect("write to String is infallible");
}
} else {
// TODO-cleanup: Remove specific boundary NTP servers after R10 is cut;
// once all racks are setting up the boundary NTP pool we can drop
// individual server lines:
// https://github.com/oxidecomputer/omicron/issues/6261
for s in servers {
writeln!(&mut new_config, "server {s} iburst minpoll 0 maxpoll 4")
.expect("write to String is infallible");
}
writeln!(
&mut new_config,
"pool {boundary_pool} iburst maxdelay 0.1 maxsources 16",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: I haven't reviewed the change in the chrony config

)
.expect("write to String is infallible");
}

// We read the contents from the old configuration file if it existed
Expand Down
Loading