From 5215d850768f8a79160220bfd6441959a8a04064 Mon Sep 17 00:00:00 2001 From: Levon Tarver <11586085+internet-diglett@users.noreply.github.com> Date: Fri, 26 Jan 2024 17:48:08 -0600 Subject: [PATCH] background task for service zone nat (#4857) Currently the logic for configuring NAT for service zones is deeply nested and crosses sled-agent http API boundaries. The cleanest way to deliver eventual consistency for service zone nat entries was to pull the zone information from inventory and use that to generate nat entries to reconcile against the `ipv4_nat_entry` table. This covers us in the following scenarios: ### RSS: * User provides configuration to RSS * RSS process ultimately creates a sled plan and service plan * Application of service plan by sled-agents creates zones * zone create makes direct calls to dendrite to configure NAT (it is the only way it can be done at this time) * eventually the Nexus zones are launched and handoff to Nexus is complete * inventory task is run, recording zone locations to db * service zone nat background task reads inventory from db and uses the data to generate records for `ipv4_nat_entry` table, then triggers dendrite sync. * sync is ultimately a noop because nat entries already exist in dendrite (dendrite operations are idempotent) ### Cold boot: * sled-agents create switch zones if they are managing a scrimlet, and subsequently create zones written to their ledgers. This may result in direct calls to dendrite. * Once nexus is back up, inventory will resume being collected * service zone nat background task will read inventory from db to reconcile entries in `ipv4_nat_entry` table and then trigger dendrite sync. * If nat is out of date on dendrite, it will be updated on trigger. ### Dendrite crash * If dendrite crashes and restarts, it will immediately contact Nexus for re-sync (pre-existing logic from earlier NAT RPW work) * service zone and instance nat entries are now present in rpw table, so all nat entries will be restored ### Migration / Relocation of service zone * New zone gets created on a sled in the rack. Direct call to dendrite will be made (it uses the same logic as pre-nexus to create zone). * Inventory task will record new location of service zone * Service zone nat background task will use inventory to update table, adding and removing the necessary nat entries and triggering a dendrite update Considerations --- Because this relies on data from the inventory task which runs on a periodic timer (600s), and because this task also runs on a periodic timer (30s), there may be some latency for picking up changes. A few potential avenues for improvement: * Plumb additional logic into service zone nat configuration that enables direct updates to the `ipv4_nat_entry` table once nexus is online. Of note, this would further bifurcate the logic of pre-nexus and post-nexus state management. At this moment, it seems that this is the most painful approach. An argument can be made that we ultimately should be lifting the nat configuration logic _out_ of the service zone creation instead. * Decrease the timer for the inventory task. This is the simplest change, however this would result in more frequent collection, increasing overhead. I do not know _how much_ this would increase overhead. Maybe it is negligible. * Plumb in the ability to trigger the inventory collection task for interesting control plane events. This would allow us to keep the _relatively_ infrequent timing intervals but allow us to refresh on-demand when needed. Related --- Closes #4650 Extracted from #4822 --- common/src/address.rs | 6 + common/src/nexus_config.rs | 16 + dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 11 + docs/how-to-run.adoc | 102 +++-- nexus/db-model/src/ipv4_nat_entry.rs | 2 +- nexus/db-model/src/ipv4net.rs | 1 + nexus/db-model/src/ipv6net.rs | 1 + nexus/db-model/src/macaddr.rs | 1 + nexus/db-model/src/schema.rs | 2 +- nexus/db-model/src/vni.rs | 10 +- .../src/db/datastore/ipv4_nat_entry.rs | 210 ++++++++++ nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 29 +- nexus/src/app/background/mod.rs | 1 + .../app/background/sync_service_zone_nat.rs | 362 ++++++++++++++++++ nexus/tests/config.test.toml | 1 + schema/crdb/29.0.0/up1.sql | 14 + schema/crdb/dbinit.sql | 17 +- smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 21 files changed, 770 insertions(+), 31 deletions(-) create mode 100644 nexus/src/app/background/sync_service_zone_nat.rs create mode 100644 schema/crdb/29.0.0/up1.sql diff --git a/common/src/address.rs b/common/src/address.rs index 0c8df33868..65a6604daf 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -18,6 +18,12 @@ pub const AZ_PREFIX: u8 = 48; pub const RACK_PREFIX: u8 = 56; pub const SLED_PREFIX: u8 = 64; +/// maximum possible value for a tcp or udp port +pub const MAX_PORT: u16 = u16::MAX; + +/// minimum possible value for a tcp or udp port +pub const MIN_PORT: u16 = u16::MIN; + /// The amount of redundancy for internal DNS servers. /// /// Must be less than or equal to MAX_DNS_REDUNDANCY. diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index be4b05ffdf..dedd091d81 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -334,6 +334,8 @@ pub struct BackgroundTaskConfig { pub inventory: InventoryConfig, /// configuration for phantom disks task pub phantom_disks: PhantomDiskConfig, + /// configuration for service zone nat sync task + pub sync_service_zone_nat: SyncServiceZoneNatConfig, } #[serde_as] @@ -376,6 +378,14 @@ pub struct NatCleanupConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct SyncServiceZoneNatConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct InventoryConfig { @@ -517,6 +527,7 @@ mod test { }; use crate::address::{Ipv6Subnet, RACK_PREFIX}; use crate::api::internal::shared::SwitchLocation; + use crate::nexus_config::SyncServiceZoneNatConfig; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; @@ -665,6 +676,7 @@ mod test { inventory.nkeep = 11 inventory.disable = false phantom_disks.period_secs = 30 + sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -769,6 +781,9 @@ mod test { phantom_disks: PhantomDiskConfig { period_secs: Duration::from_secs(30), }, + sync_service_zone_nat: SyncServiceZoneNatConfig { + period_secs: Duration::from_secs(30) + } }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -827,6 +842,7 @@ mod test { inventory.nkeep = 3 inventory.disable = false phantom_disks.period_secs = 30 + sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index c08f592852..8cca1b063a 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -70,6 +70,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -139,6 +143,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -195,6 +203,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 65520ab59c..f291bbb6a0 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -264,6 +264,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -369,6 +373,13 @@ task: "phantom_disks" number of phantom disks deleted: 0 number of phantom disk delete errors: 0 +task: "service_zone_nat_tracker" + configured period: every 30s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms + last completion reported error: inventory collection is None + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index f6d780ad72..c1f78a0521 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -498,41 +498,93 @@ Follow the instructions to set up the https://github.com/oxidecomputer/oxide.rs[ oxide auth login --host http://192.168.1.21 ---- +=== Configure quotas for your silo + +Setting resource quotas is required before you can begin uploading images, provisioning instances, etc. +In this example we'll update the recovery silo so we can provision instances directly from it: + +[source, console] +---- +$ oxide api /v1/system/silos/recovery/quotas --method PUT --input - <>). +Here we will first create an ip pool for the recovery silo: [source,console] ----- -$ oxide ip-pool range add --pool default --first 192.168.1.31 --last 192.168.1.40 -success -IpPoolRange { - id: 4a61e65a-d96d-4c56-9cfd-dc1e44d9e99b, - ip_pool_id: 1b1289a7-cefe-4a7e-a8c9-d93330846301, - range: V4( - Ipv4Range { - first: 192.168.1.31, - last: 192.168.1.40, - }, - ), - time_created: 2023-08-02T16:31:43.679785Z, +--- +$ oxide api /v1/system/ip-pools --method POST --input - < CreateResult { + use db::schema::ipv4_nat_entry::dsl; + + let vni = nexus_db_model::Vni(Vni::SERVICES_VNI); + + // find all active nat entries with the services vni + let result: Vec = dsl::ipv4_nat_entry + .filter(dsl::vni.eq(vni)) + .filter(dsl::version_removed.is_null()) + .select(Ipv4NatEntry::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // determine what to keep and what to delete + let mut keep: Vec<_> = vec![]; + let mut delete: Vec<_> = vec![]; + + for db_entry in result.iter() { + let values = Ipv4NatValues { + external_address: db_entry.external_address, + first_port: db_entry.first_port, + last_port: db_entry.last_port, + sled_address: db_entry.sled_address, + vni: db_entry.vni, + mac: db_entry.mac, + }; + + if nat_entries.contains(&values) { + keep.push(values); + } else { + delete.push(db_entry) + } + } + + // delete entries that are not present in requested entries + for entry in delete { + if let Err(e) = self.ipv4_nat_delete(opctx, entry).await { + error!( + opctx.log, + "failed to delete service zone nat entry"; + "error" => ?e, + "entry" => ?entry, + ); + } + } + + // optimization: only attempt to add what is missing + let add = nat_entries.iter().filter(|entry| !keep.contains(entry)); + + let mut count = 0; + + // insert nat_entries + for entry in add { + if let Err(e) = + self.ensure_ipv4_nat_entry(opctx, entry.clone()).await + { + error!( + opctx.log, + "failed to ensure service zone nat entry"; + "error" => ?e, + "entry" => ?entry, + ); + continue; + } + count += 1; + } + + Ok(count) + } + pub async fn ipv4_nat_delete( &self, opctx: &OpContext, @@ -592,4 +678,128 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + // Test our ability to reconcile a set of service zone nat entries + #[tokio::test] + async fn ipv4_nat_sync_service_zones() { + let logctx = dev::test_setup_log("ipv4_nat_sync_service_zones"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // We should not have any NAT entries at this moment + let initial_state = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert!(initial_state.is_empty()); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 0 + ); + + // create two nat entries: + // 1. an entry should be deleted during the next sync + // 2. an entry that should be kept during the next sync + + let external_address = external::Ipv4Net( + ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(), + ); + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(), + ); + + // Add a nat entry. + let nat1 = Ipv4NatValues { + external_address: external_address.into(), + first_port: 0.into(), + last_port: 999.into(), + sled_address: sled_address.into(), + vni: Vni(external::Vni::SERVICES_VNI), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2A").unwrap(), + ), + }; + + let nat2 = Ipv4NatValues { + first_port: 1000.into(), + last_port: 1999.into(), + ..nat1 + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + datastore.ensure_ipv4_nat_entry(&opctx, nat2.clone()).await.unwrap(); + + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 2); + + // sync two nat entries: + // 1. a nat entry that already exists + // 2. a nat entry that does not already exist + + let nat3 = Ipv4NatValues { + first_port: 2000.into(), + last_port: 2999.into(), + ..nat2 + }; + + datastore + .ipv4_nat_sync_service_zones(&opctx, &[nat2.clone(), nat3.clone()]) + .await + .unwrap(); + + // we should have three nat entries in the db + // 1. the old one that was deleted during the last sync + // 2. the old one that "survived" the last sync + // 3. a new one that was added during the last sync + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 3); + + // nat2 and nat3 should not be soft deleted + for request in [nat2.clone(), nat3.clone()] { + assert!(db_entries.iter().any(|entry| { + entry.first_port == request.first_port + && entry.last_port == request.last_port + && entry.time_deleted.is_none() + })); + } + + // nat1 should be soft deleted + assert!(db_entries.iter().any(|entry| { + entry.first_port == nat1.first_port + && entry.last_port == nat1.last_port + && entry.time_deleted.is_some() + && entry.version_removed.is_some() + })); + + // add nat1 back + // this simulates a zone leaving and then returning, i.e. when a sled gets restarted + datastore + .ipv4_nat_sync_service_zones( + &opctx, + &[nat1.clone(), nat2.clone(), nat3.clone()], + ) + .await + .unwrap(); + + // we should have four nat entries in the db + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 4); + + // there should be an active entry for nat1 again + assert!(db_entries.iter().any(|entry| { + entry.first_port == nat1.first_port + && entry.last_port == nat1.last_port + && entry.time_deleted.is_none() + && entry.version_removed.is_none() + })); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index f13ea721b8..dcab2d9da1 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -105,6 +105,7 @@ inventory.nkeep = 5 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index d30d2162c4..49ac6d93e2 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -12,6 +12,7 @@ use super::external_endpoints; use super::inventory_collection; use super::nat_cleanup; use super::phantom_disks; +use super::sync_service_zone_nat::ServiceZoneNatTracker; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; @@ -56,6 +57,9 @@ pub struct BackgroundTasks { /// task handle for the task that detects phantom disks pub task_phantom_disks: common::TaskHandle, + + /// task handle for the service zone nat tracker + pub task_service_zone_nat_tracker: common::TaskHandle, } impl BackgroundTasks { @@ -106,6 +110,9 @@ impl BackgroundTasks { (task, watcher_channel) }; + let dpd_clients: Vec<_> = + dpd_clients.values().map(|client| client.clone()).collect(); + let nat_cleanup = { driver.register( "nat_v4_garbage_collector".to_string(), @@ -116,7 +123,7 @@ impl BackgroundTasks { config.nat_cleanup.period_secs, Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( datastore.clone(), - dpd_clients.values().map(|client| client.clone()).collect(), + dpd_clients.clone(), )), opctx.child(BTreeMap::new()), vec![], @@ -149,7 +156,8 @@ impl BackgroundTasks { // Background task: phantom disk detection let task_phantom_disks = { - let detector = phantom_disks::PhantomDiskDetector::new(datastore); + let detector = + phantom_disks::PhantomDiskDetector::new(datastore.clone()); let task = driver.register( String::from("phantom_disks"), @@ -163,6 +171,22 @@ impl BackgroundTasks { task }; + let task_service_zone_nat_tracker = { + driver.register( + "service_zone_nat_tracker".to_string(), + String::from( + "ensures service zone nat records are recorded in NAT RPW table", + ), + config.sync_service_zone_nat.period_secs, + Box::new(ServiceZoneNatTracker::new( + datastore.clone(), + dpd_clients.clone(), + )), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + BackgroundTasks { driver, task_internal_dns_config, @@ -174,6 +198,7 @@ impl BackgroundTasks { nat_cleanup, task_inventory_collection, task_phantom_disks, + task_service_zone_nat_tracker, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 70b20224d4..166fc2654b 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -14,6 +14,7 @@ mod inventory_collection; mod nat_cleanup; mod phantom_disks; mod status; +mod sync_service_zone_nat; pub use common::Driver; pub use common::TaskHandle; diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/sync_service_zone_nat.rs new file mode 100644 index 0000000000..8e75f97d7a --- /dev/null +++ b/nexus/src/app/background/sync_service_zone_nat.rs @@ -0,0 +1,362 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting changes to service zone locations and +//! updating the NAT rpw table accordingly + +use super::common::BackgroundTask; +use anyhow::Context; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::Ipv4NatValues; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::DataStore; +use omicron_common::address::{MAX_PORT, MIN_PORT}; +use omicron_common::api::external; +use serde_json::json; +use sled_agent_client::types::OmicronZoneType; +use std::net::{IpAddr, SocketAddr}; +use std::num::NonZeroU32; +use std::sync::Arc; + +// Minumum number of boundary NTP zones that should be present in a valid +// set of service zone nat configurations. +const MIN_NTP_COUNT: usize = 1; + +// Minumum number of nexus zones that should be present in a valid +// set of service zone nat configurations. +const MIN_NEXUS_COUNT: usize = 1; + +// Minumum number of external DNS zones that should be present in a valid +// set of service zone nat configurations. +const MIN_EXTERNAL_DNS_COUNT: usize = 1; + +/// Background task that ensures service zones have nat entries +/// persisted in the NAT RPW table +pub struct ServiceZoneNatTracker { + datastore: Arc, + dpd_clients: Vec>, +} + +impl ServiceZoneNatTracker { + pub fn new( + datastore: Arc, + dpd_clients: Vec>, + ) -> Self { + Self { datastore, dpd_clients } + } +} + +impl BackgroundTask for ServiceZoneNatTracker { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let log = &opctx.log; + + // check inventory + let inventory = match self + .datastore + .inventory_get_latest_collection( + opctx, + NonZeroU32::new(u32::MAX).unwrap(), + ) + .await + { + Ok(inventory) => inventory, + Err(e) => { + error!( + &log, + "failed to collect inventory"; + "error" => format!("{:#}", e) + ); + return json!({ + "error": + format!( + "failed collect inventory: \ + {:#}", + e + ) + }); + } + }; + + // generate set of Service Zone NAT entries + let collection = match inventory { + Some(c) => c, + // this could happen if we check the inventory table before the + // inventory job has finished running for the first time + None => { + warn!( + &log, + "inventory collection is None"; + ); + return json!({ + "error": "inventory collection is None" + }); + } + }; + + let mut ipv4_nat_values: Vec = vec![]; + let mut ntp_count = 0; + let mut nexus_count = 0; + let mut dns_count = 0; + + for (sled_id, zones_found) in collection.omicron_zones { + let (_, sled) = match LookupPath::new(opctx, &self.datastore) + .sled_id(sled_id) + .fetch() + .await + .context("failed to look up sled") + { + Ok(result) => result, + Err(e) => { + error!( + &log, + "failed to lookup sled by id"; + "id" => ?sled_id, + "error" => ?e, + ); + continue; + } + }; + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::new(*sled.ip, 128).unwrap(), + ); + + let zones_config: sled_agent_client::types::OmicronZonesConfig = + zones_found.zones; + let zones: Vec = + zones_config.zones; + + for zone in zones { + let zone_type: OmicronZoneType = zone.zone_type; + match zone_type { + OmicronZoneType::BoundaryNtp { + nic, snat_cfg, .. + } => { + let external_ip = match snat_cfg.ip { + IpAddr::V4(addr) => addr, + IpAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + } + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: snat_cfg.first_port.into(), + last_port: snat_cfg.last_port.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + ntp_count += 1; + } + OmicronZoneType::Nexus { nic, external_ip, .. } => { + let external_ip = match external_ip { + IpAddr::V4(addr) => addr, + IpAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + } + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: MIN_PORT.into(), + last_port: MAX_PORT.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + nexus_count += 1; + }, + OmicronZoneType::ExternalDns { nic, dns_address, .. } => { + let socket_addr: SocketAddr = match dns_address.parse() { + Ok(value) => value, + Err(e) => { + error!( + &log, + "failed to parse value into socketaddr"; + "value" => dns_address, + "error" => ?e, + ); + continue; + } + }; + let external_ip = match socket_addr { + SocketAddr::V4(v4) => { + *v4.ip() + }, + SocketAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + }, + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: MIN_PORT.into(), + last_port: MAX_PORT.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + dns_count += 1; + }, + // we explictly list all cases instead of using a wildcard, + // that way if someone adds a new type to OmicronZoneType that + // requires NAT, they must come here to update this logic as + // well + OmicronZoneType::Clickhouse {..} => continue, + OmicronZoneType::ClickhouseKeeper {..} => continue, + OmicronZoneType::CockroachDb {..} => continue, + OmicronZoneType::Crucible {..} => continue, + OmicronZoneType::CruciblePantry {..} => continue, + OmicronZoneType::InternalNtp {..} => continue, + OmicronZoneType::InternalDns {..} => continue, + OmicronZoneType::Oximeter { ..} => continue, + } + } + } + + // if we make it this far this should not be empty: + // * nexus is running so we should at least have generated a nat value for it + // * nexus requies other services zones that require nat to come up first + if ipv4_nat_values.is_empty() { + error!( + &log, + "nexus is running but no service zone nat values could be generated from inventory"; + ); + return json!({ + "error": "nexus is running but no service zone nat values could be generated from inventory" + }); + } + + if dns_count < MIN_EXTERNAL_DNS_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of dns zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of dns zones" + }); + } + + if ntp_count < MIN_NTP_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of ntp zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of ntp zones" + + }); + } + + if nexus_count < MIN_NEXUS_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of nexus zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of nexus zones" + + }); + } + + // reconcile service zone nat entries + let result = match self.datastore.ipv4_nat_sync_service_zones(opctx, &ipv4_nat_values).await { + Ok(num) => num, + Err(e) => { + error!( + &log, + "failed to update service zone nat records"; + "error" => format!("{:#}", e) + ); + return json!({ + "error": + format!( + "failed to update service zone nat records: \ + {:#}", + e + ) + }); + }, + }; + + // notify dpd if we've added any new records + if result > 0 { + for client in &self.dpd_clients { + if let Err(e) = client.ipv4_nat_trigger_update().await { + error!( + &log, + "failed to trigger dpd rpw workflow"; + "error" => ?e + ); + }; + } + } + + let rv = serde_json::to_value(&result).unwrap_or_else(|error| { + json!({ + "error": + format!( + "failed to serialize final value: {:#}", + error + ) + }) + }); + + rv + } + .boxed() + } +} diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index a4436234f0..476b8fe6c8 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -99,6 +99,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/schema/crdb/29.0.0/up1.sql b/schema/crdb/29.0.0/up1.sql new file mode 100644 index 0000000000..a213380944 --- /dev/null +++ b/schema/crdb/29.0.0/up1.sql @@ -0,0 +1,14 @@ +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup_by_vni ON omicron.public.ipv4_nat_entry ( + vni +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + mac, + version_added, + version_removed, + time_created, + time_deleted +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 86d1340379..6ff92acfa4 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3383,6 +3383,21 @@ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup_by_vni ON omicron.public.ipv4_nat_entry ( + vni +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + mac, + version_added, + version_removed, + time_created, + time_deleted +); + INSERT INTO omicron.public.db_metadata ( singleton, time_created, @@ -3390,7 +3405,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '28.0.0', NULL) + ( TRUE, NOW(), NOW(), '29.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index d330f32ab6..d84bf8d4b0 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -47,6 +47,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index cbd4851613..01206655f0 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -47,6 +47,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds.