diff --git a/Cargo.lock b/Cargo.lock index 38fa185c59..437e0c1ab9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1152,6 +1152,18 @@ dependencies = [ "instant", ] +[[package]] +name = "fatfs" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e18f80a87439240dac45d927fd8f8081b6f1e34c03e97271189fa8a8c2e96c8f" +dependencies = [ + "bitflags", + "byteorder", + "chrono", + "log", +] + [[package]] name = "ff" version = "0.10.1" @@ -2401,6 +2413,7 @@ dependencies = [ "diesel-dtrace", "dropshot", "expectorate", + "fatfs", "futures", "headers", "hex", @@ -2414,6 +2427,7 @@ dependencies = [ "newtype_derive", "nexus-test-utils", "nexus-test-utils-macros", + "num-integer", "omicron-common", "omicron-rpaths", "omicron-sled-agent", diff --git a/common/src/sql/dbinit.sql b/common/src/sql/dbinit.sql index 1703df542d..5d7b9f7e71 100644 --- a/common/src/sql/dbinit.sql +++ b/common/src/sql/dbinit.sql @@ -303,6 +303,9 @@ CREATE TABLE omicron.public.instance ( /* Every Instance is in exactly one Project at a time. */ project_id UUID NOT NULL, + /* user data for instance initialization systems (e.g. cloud-init) */ + user_data BYTES NOT NULL, + /* * TODO Would it make sense for the runtime state to live in a separate * table? diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 2e3df99c3e..8c8ff68486 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -18,6 +18,7 @@ cookie = "0.16" crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "945daedb88cefa790f1d994b3a038b8fa9ac514a" } # Tracking pending 2.0 version. diesel = { git = "https://github.com/diesel-rs/diesel", rev = "ce77c382", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } +fatfs = "0.3.5" futures = "0.3.21" headers = "0.3.7" hex = "0.4.3" @@ -30,6 +31,7 @@ libc = "0.2.123" macaddr = { version = "1.0.1", features = [ "serde_std" ]} mime_guess = "2.0.4" newtype_derive = "0.1.6" +num-integer = "0.1.44" oso = "0.26" oximeter-client = { path = "../oximeter-client" } oximeter-db = { path = "../oximeter/db/" } diff --git a/nexus/src/cidata.rs b/nexus/src/cidata.rs new file mode 100644 index 0000000000..4daf9cdff9 --- /dev/null +++ b/nexus/src/cidata.rs @@ -0,0 +1,105 @@ +use crate::db::{identity::Resource, model::Instance}; +use fatfs::{FatType, FileSystem, FormatVolumeOptions, FsOptions}; +use num_integer::Integer; +use omicron_common::api::external::Error; +use serde::Serialize; +use std::io::{self, Cursor, Write}; +use uuid::Uuid; + +pub const MAX_USER_DATA_BYTES: usize = 32 * 1024; // 32 KiB + +impl Instance { + pub fn generate_cidata(&self) -> Result, Error> { + // cloud-init meta-data is YAML, but YAML is a strict superset of JSON. + let meta_data = serde_json::to_vec(&MetaData { + instance_id: self.id(), + local_hostname: &self.runtime().hostname, + public_keys: &[], // TODO + }) + .map_err(|_| Error::internal_error("failed to serialize meta-data"))?; + let cidata = + build_vfat(&meta_data, &self.user_data).map_err(|err| { + Error::internal_error(&format!( + "failed to create cidata volume: {}", + err + )) + })?; + Ok(cidata) + } +} + +#[derive(Serialize)] +#[serde(rename_all = "kebab-case")] +struct MetaData<'a> { + instance_id: Uuid, + local_hostname: &'a str, + public_keys: &'a [String], +} + +fn build_vfat(meta_data: &[u8], user_data: &[u8]) -> io::Result> { + let file_sectors = + meta_data.len().div_ceil(&512) + user_data.len().div_ceil(&512); + // vfat can hold more data than this, but we don't expect to ever need that for cloud-init + // purposes. + if file_sectors > 512 { + return Err(io::Error::new(io::ErrorKind::Other, "too much vfat data")); + } + + // https://github.com/oxidecomputer/omicron/pull/911#discussion_r851354213 + // If we're storing < 170 KiB of clusters, the FAT overhead is 35 sectors; + // if we're storing < 341 KiB of clusters, the overhead is 37. With a limit + // of 512 sectors (error check above), we can assume an overhead of 37. + // Additionally, fatfs refuses to format a disk that is smaller than 42 + // sectors. + let sectors = 42.max(file_sectors + 37); + + let mut disk = Cursor::new(vec![0; sectors * 512]); + fatfs::format_volume( + &mut disk, + FormatVolumeOptions::new() + .bytes_per_cluster(512) + .fat_type(FatType::Fat12) + .volume_label(*b"cidata "), + )?; + + { + let fs = FileSystem::new(&mut disk, FsOptions::new())?; + let root_dir = fs.root_dir(); + for (file, data) in [("meta-data", meta_data), ("user-data", user_data)] + { + if !data.is_empty() { + let mut file = root_dir.create_file(file)?; + file.write_all(data)?; + } + } + } + + Ok(disk.into_inner()) +} + +#[cfg(test)] +mod tests { + /// the fatfs crate has some unfortunate panics if you ask it to do + /// incredibly stupid things, like format an empty disk or create a + /// filesystem with an invalid cluster size. + /// + /// to ensure that our math for the filesystem size is correct, and also to + /// ensure that we don't ask fatfs to do incredibly stupid things, this + /// test checks that `build_vfat` works on a representative sample of weird + /// file sizes. (32 KiB is our enforced limit for user_data, so push it a + /// little further.) + #[test] + fn build_vfat_works_with_arbitrarily_sized_input() { + let upper = crate::cidata::MAX_USER_DATA_BYTES + 4096; + // somewhat arbitrarily-chosen prime numbers near 1 KiB and 256 bytes + for md_size in (0..upper).step_by(1019) { + for ud_size in (0..upper).step_by(269) { + assert!(super::build_vfat( + &vec![0x5a; md_size], + &vec![0xa5; ud_size] + ) + .is_ok()); + } + } + } +} diff --git a/nexus/src/db/model.rs b/nexus/src/db/model.rs index dfd268a41a..4fee472132 100644 --- a/nexus/src/db/model.rs +++ b/nexus/src/db/model.rs @@ -1112,6 +1112,9 @@ pub struct Instance { /// id for the project containing this Instance pub project_id: Uuid, + /// user data for instance initialization systems (e.g. cloud-init) + pub user_data: Vec, + /// runtime state of the Instance #[diesel(embed)] pub runtime_state: InstanceRuntimeState, @@ -1126,7 +1129,12 @@ impl Instance { ) -> Self { let identity = InstanceIdentity::new(instance_id, params.identity.clone()); - Self { identity, project_id, runtime_state: runtime } + Self { + identity, + project_id, + user_data: params.user_data.clone(), + runtime_state: runtime, + } } pub fn runtime(&self) -> &InstanceRuntimeState { diff --git a/nexus/src/db/schema.rs b/nexus/src/db/schema.rs index 92395adae4..b0f7860bc9 100644 --- a/nexus/src/db/schema.rs +++ b/nexus/src/db/schema.rs @@ -68,6 +68,7 @@ table! { time_modified -> Timestamptz, time_deleted -> Nullable, project_id -> Uuid, + user_data -> Binary, state -> crate::db::model::InstanceStateEnum, time_state_updated -> Timestamptz, state_generation -> Int8, diff --git a/nexus/src/external_api/params.rs b/nexus/src/external_api/params.rs index ce6721bbdf..9a83bab95c 100644 --- a/nexus/src/external_api/params.rs +++ b/nexus/src/external_api/params.rs @@ -138,6 +138,15 @@ pub struct InstanceCreate { pub memory: ByteCount, pub hostname: String, // TODO-cleanup different type? + /// User data for instance initialization systems (such as cloud-init). + /// Must be a Base64-encoded string, as specified in RFC 4648 § 4 (+ and / + /// characters with padding). Maximum 32 KiB unencoded data. + // TODO: this should emit `"format": "byte"`, but progenitor doesn't + // understand that yet. + #[schemars(default, with = "String")] + #[serde(default, with = "serde_user_data")] + pub user_data: Vec, + /// The network interfaces to be created for this instance. #[serde(default)] pub network_interfaces: InstanceNetworkInterfaceAttachment, @@ -147,6 +156,43 @@ pub struct InstanceCreate { pub disks: Vec, } +mod serde_user_data { + use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; + + pub fn serialize( + data: &Vec, + serializer: S, + ) -> Result + where + S: Serializer, + { + base64::encode(data).serialize(serializer) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + match base64::decode(<&str>::deserialize(deserializer)?) { + Ok(buf) => { + // if you change this, also update the stress test in crate::cidata + if buf.len() > crate::cidata::MAX_USER_DATA_BYTES { + Err(D::Error::invalid_length( + buf.len(), + &"less than 32 KiB", + )) + } else { + Ok(buf) + } + } + Err(_) => Err(D::Error::invalid_value( + serde::de::Unexpected::Other("invalid base64 string"), + &"a valid base64 string", + )), + } + } +} + /// Migration parameters for an [`Instance`](omicron_common::api::external::Instance) #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct InstanceMigrate { diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index befbf7102d..257db4cdff 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -15,6 +15,7 @@ pub mod authn; // Public only for testing pub mod authz; // Public for documentation examples +mod cidata; pub mod config; // Public for testing pub mod context; // Public for documentation examples pub mod db; // Public for documentation examples diff --git a/nexus/src/nexus.rs b/nexus/src/nexus.rs index bd05aee1c5..23932d606f 100644 --- a/nexus/src/nexus.rs +++ b/nexus/src/nexus.rs @@ -1711,6 +1711,9 @@ impl Nexus { ), nics: nics.iter().map(|nic| nic.clone().into()).collect(), disks: disk_reqs, + cloud_init_bytes: Some(base64::encode( + db_instance.generate_cidata()?, + )), }; let sa = self.instance_sled(&db_instance).await?; diff --git a/nexus/src/sagas.rs b/nexus/src/sagas.rs index db7e8801a1..8f46710815 100644 --- a/nexus/src/sagas.rs +++ b/nexus/src/sagas.rs @@ -973,6 +973,8 @@ async fn sim_instance_migrate( nics: vec![], // TODO: populate disks disks: vec![], + // TODO: populate cloud init bytes + cloud_init_bytes: None, }; let target = InstanceRuntimeStateRequested { run_state: InstanceStateRequested::Migrating, diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index a6c9fc655d..015883a29d 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -162,6 +162,9 @@ pub async fn create_instance( ncpus: InstanceCpuCount(4), memory: ByteCount::from_mebibytes_u32(256), hostname: String::from("the_host"), + user_data: + b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" + .to_vec(), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, disks: vec![], diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 0cc99c4b96..cc31c02c04 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -181,6 +181,7 @@ lazy_static! { ncpus: InstanceCpuCount(1), memory: ByteCount::from_gibibytes_u32(16), hostname: String::from("demo-instance"), + user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, disks: vec![], diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 81970fd7f2..3b2ead4b79 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -146,6 +146,7 @@ async fn test_instances_create_reboot_halt( ncpus: instance.ncpus, memory: instance.memory, hostname: instance.hostname.clone(), + user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, disks: vec![], @@ -563,6 +564,7 @@ async fn test_instance_create_saga_removes_instance_database_record( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("inst"), + user_data: vec![], network_interfaces: interface_params.clone(), disks: vec![], }; @@ -584,6 +586,7 @@ async fn test_instance_create_saga_removes_instance_database_record( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("inst2"), + user_data: vec![], network_interfaces: interface_params, disks: vec![], }; @@ -669,6 +672,7 @@ async fn test_instance_with_single_explicit_ip_address( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nic-test"), + user_data: vec![], network_interfaces: interface_params, disks: vec![], }; @@ -785,6 +789,7 @@ async fn test_instance_with_new_custom_network_interfaces( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nic-test"), + user_data: vec![], network_interfaces: interface_params, disks: vec![], }; @@ -878,6 +883,7 @@ async fn test_instance_create_delete_network_interface( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nic-test"), + user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::None, disks: vec![], }; @@ -1061,6 +1067,7 @@ async fn test_instance_with_multiple_nics_unwinds_completely( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nic-test"), + user_data: vec![], network_interfaces: interface_params, disks: vec![], }; @@ -1134,6 +1141,7 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nfs"), + user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, disks: vec![params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { @@ -1229,6 +1237,7 @@ async fn test_attach_eight_disks_to_instance( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nfs"), + user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, disks: (0..8) .map(|i| { @@ -1334,6 +1343,7 @@ async fn test_cannot_attach_nine_disks_to_instance( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nfs"), + user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, disks: (0..9) .map(|i| { @@ -1460,6 +1470,7 @@ async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_mebibytes_u32(4), hostname: String::from("nfs"), + user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, disks: (0..8) .map(|i| { diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 9c8279d852..8089660528 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -39,6 +39,7 @@ async fn create_instance_expect_failure( ncpus: InstanceCpuCount(1), memory: ByteCount::from_mebibytes_u32(256), hostname: name.to_string(), + user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, disks: vec![], }; diff --git a/openapi/nexus.json b/openapi/nexus.json index 353cc8c21d..4fd7f6787a 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -5358,6 +5358,10 @@ "$ref": "#/components/schemas/InstanceNetworkInterfaceAttachment" } ] + }, + "user_data": { + "description": "User data for instance initialization systems (such as cloud-init). Must be a Base64-encoded string, as specified in RFC 4648 § 4 (+ and / characters with padding). Maximum 32 KiB unencoded data.", + "type": "string" } }, "required": [ diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index cfaf49594e..36e51a2408 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -665,6 +665,10 @@ "description": "Describes the instance hardware.", "type": "object", "properties": { + "cloud_init_bytes": { + "nullable": true, + "type": "string" + }, "disks": { "type": "array", "items": { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 1ebccfe3c9..4c9b8656b8 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -192,6 +192,7 @@ struct InstanceInner { // Disk related properties requested_disks: Vec, + cloud_init_bytes: Option, // Internal State management state: InstanceStates, @@ -298,7 +299,7 @@ impl InstanceInner { nics, disks: self.requested_disks.clone(), migrate, - cloud_init_bytes: None, + cloud_init_bytes: self.cloud_init_bytes.clone(), }; info!(self.log, "Sending ensure request to propolis: {:?}", request); @@ -430,6 +431,7 @@ impl Instance { vnic_allocator, requested_nics: initial.nics, requested_disks: initial.disks, + cloud_init_bytes: initial.cloud_init_bytes, vlan, state: InstanceStates::new(initial.runtime), running_state: None, @@ -693,6 +695,7 @@ mod test { }, nics: vec![], disks: vec![], + cloud_init_bytes: None, } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 8cdcdf40ae..9a86b3ef62 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -243,6 +243,7 @@ mod test { }, nics: vec![], disks: vec![], + cloud_init_bytes: None, } } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 0ff062f00a..5fe493ecce 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -51,6 +51,7 @@ pub struct InstanceHardware { pub runtime: InstanceRuntimeState, pub nics: Vec, pub disks: Vec, + pub cloud_init_bytes: Option, } /// Sent to a sled agent to establish the runtime state of an Instance