From 0e2972e46e02a0a4095fd8115ffd1f73d5bde01e Mon Sep 17 00:00:00 2001 From: tsturzl Date: Thu, 17 Jun 2021 21:22:54 -0600 Subject: [PATCH 01/70] time to test --- src/cgroups/v2/memory.rs | 122 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index a83f7391f..f4b7771ab 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -1,10 +1,16 @@ -use anyhow::Result; +use anyhow::{Result, *}; use std::path::Path; use oci_spec::{LinuxMemory, LinuxResources}; +use crate::cgroups::common; + use super::controller::Controller; +const CGROUP_MEMORY_SWAP: &str = "memory.swap.max"; +const CGROUP_MEMORY_MAX: &str = "memory.max"; +const CGROUP_MEMORY_LOW: &str = "memory.low"; + pub struct Memory {} impl Controller for Memory { @@ -18,7 +24,119 @@ impl Controller for Memory { } impl Memory { - fn apply(_: &Path, _: &LinuxMemory) -> Result<()> { + fn set>(path: P, val: i64) -> Result<()> { + if val == 0 { + Ok(()) + } else if val == -1 { + common::write_cgroup_file_str(path, "max") + } else { + common::write_cgroup_file(path, val) + } + } + + fn apply(path: &Path, memory: &LinuxMemory) -> Result<()> { + // if nothing is set just exit right away + if memory.reservation.is_none() && memory.limit.is_none() && memory.swap.is_none() { + return Ok(()); + } + + match memory.limit { + Some(limit) if limit < -1 => { + bail!("invalid memory value: {}", limit); + } + Some(limit) => match memory.swap { + Some(swap) if swap < -1 => { + bail!("invalid swap value: {}", swap); + } + Some(swap) => { + Memory::set(path.join(CGROUP_MEMORY_SWAP), swap)?; + Memory::set(path.join(CGROUP_MEMORY_MAX), limit)?; + } + None => { + if limit == -1 { + Memory::set(path.join(CGROUP_MEMORY_SWAP), -1)?; + } + Memory::set(path.join(CGROUP_MEMORY_MAX), limit)?; + } + }, + None => { + if memory.swap.is_some() { + bail!("unsable to set swap limit without memory limit"); + } + } + }; + + if let Some(reservation) = memory.reservation { + if reservation < -1 { + bail!("invalid memory reservation value: {}", reservation); + } + Memory::set(path.join(CGROUP_MEMORY_LOW), reservation)?; + } + Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::cgroups::test::{create_temp_dir, set_fixture}; + use oci_spec::LinuxMemory; + use std::fs::read_to_string; + + #[test] + fn test_set_memory_v2() { + let tmp = create_temp_dir("test_set_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let limit = 1024; + let reservation = 512; + let swap = 2048; + let memory_limits = &LinuxMemory { + limit: Some(limit), + reservation: Some(reservation), + swap: Some(swap), + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + Memory::apply(&tmp, memory_limits).expect("apply memory limits"); + + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit"); + assert_eq!(limit_content, limit.to_string()); + + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); + assert_eq!(swap_content, swap.to_string()); + + let reservation_content = + read_to_string(tmp.join(CGROUP_MEMORY_LOW)).expect("read memory reservation"); + assert_eq!(reservation_content, reservation.to_string()); + } + + #[test] + fn test_set_memory_unlimited_v2() { + let tmp = create_temp_dir("test_set_memory_unlimited_v2") + .expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(-1), + reservation: None, + swap: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + Memory::apply(&tmp, memory_limits).expect("apply memory limits"); + + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit"); + assert_eq!(limit_content, "max"); + + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); + assert_eq!(swap_content, "max"); + } +} From c0cb4440272e7d0c5a5e550330fe2edf7f9baa06 Mon Sep 17 00:00:00 2001 From: Travis Sturzl Date: Thu, 17 Jun 2021 22:21:36 -0600 Subject: [PATCH 02/70] hella tests --- src/cgroups/v2/memory.rs | 135 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index f4b7771ab..df00bbf49 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -61,7 +61,7 @@ impl Memory { }, None => { if memory.swap.is_some() { - bail!("unsable to set swap limit without memory limit"); + bail!("unable to set swap limit without memory limit"); } } }; @@ -139,4 +139,137 @@ mod tests { let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); assert_eq!(swap_content, "max"); } + + #[test] + fn test_err_swap_no_memory_v2() { + let tmp = + create_temp_dir("test_err_swap_no_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: None, + swap: Some(512), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + #[test] + fn test_err_bad_limit_v2() { + let tmp = create_temp_dir("test_err_bad_limit_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(-2), + swap: None, + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + #[test] + fn test_err_bad_swap_v2() { + let tmp = create_temp_dir("test_err_bad_swap_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(512), + swap: Some(-3), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + quickcheck! { + fn property_test_set_memory_v2(linux_memory: LinuxMemory) -> bool { + let tmp = create_temp_dir("property_test_set_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let result = Memory::apply(&tmp, &linux_memory); + + // we need to check for expected errors first and foremost or we'll get false negatives + // later + if let Some(limit) = linux_memory.limit { + if limit < -1 { + return result.is_err(); + } + } + + if let Some(swap) = linux_memory.swap { + if swap < -1 { + return result.is_err(); + } + if linux_memory.limit.is_none() { + return result.is_err(); + } + } + + if let Some(reservation) = linux_memory.reservation { + if reservation < -1 { + return result.is_err(); + } + } + + // check the limit file is set as expected + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit to string"); + let limit_check = match linux_memory.limit { + Some(limit) if limit == -1 => limit_content == "max", + Some(limit) => limit_content == limit.to_string(), + None => limit_content == "0", + }; + + // check the swap file is set as expected + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit to string"); + let swap_check = match linux_memory.swap { + Some(swap) if swap == -1 => swap_content == "max", + Some(swap) => swap_content == swap.to_string(), + None => { + match linux_memory.limit { + Some(limit) if limit == -1 => swap_content == "max", + _ => swap_content == "0", + } + } + }; + + + // check the resevation file is set as expected + let reservation_content = read_to_string(tmp.join(CGROUP_MEMORY_LOW)).expect("read memory reservation to string"); + let reservation_check = match linux_memory.reservation { + Some(reservation) if reservation == -1 => reservation_content == "max", + Some(reservation) => reservation_content == reservation.to_string(), + None => reservation_content == "0", + }; + + println!("limit_check: {}", limit_check); + println!("swap_check: {}", swap_check); + println!("reservation_check: {}", reservation_check); + limit_check && swap_check && reservation_check + } + } } From 57a080ef8ab4b27b373b35af58f82b1992ab6ba8 Mon Sep 17 00:00:00 2001 From: tsturzl Date: Thu, 17 Jun 2021 21:22:54 -0600 Subject: [PATCH 03/70] time to test --- src/cgroups/v2/memory.rs | 122 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index a83f7391f..f4b7771ab 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -1,10 +1,16 @@ -use anyhow::Result; +use anyhow::{Result, *}; use std::path::Path; use oci_spec::{LinuxMemory, LinuxResources}; +use crate::cgroups::common; + use super::controller::Controller; +const CGROUP_MEMORY_SWAP: &str = "memory.swap.max"; +const CGROUP_MEMORY_MAX: &str = "memory.max"; +const CGROUP_MEMORY_LOW: &str = "memory.low"; + pub struct Memory {} impl Controller for Memory { @@ -18,7 +24,119 @@ impl Controller for Memory { } impl Memory { - fn apply(_: &Path, _: &LinuxMemory) -> Result<()> { + fn set>(path: P, val: i64) -> Result<()> { + if val == 0 { + Ok(()) + } else if val == -1 { + common::write_cgroup_file_str(path, "max") + } else { + common::write_cgroup_file(path, val) + } + } + + fn apply(path: &Path, memory: &LinuxMemory) -> Result<()> { + // if nothing is set just exit right away + if memory.reservation.is_none() && memory.limit.is_none() && memory.swap.is_none() { + return Ok(()); + } + + match memory.limit { + Some(limit) if limit < -1 => { + bail!("invalid memory value: {}", limit); + } + Some(limit) => match memory.swap { + Some(swap) if swap < -1 => { + bail!("invalid swap value: {}", swap); + } + Some(swap) => { + Memory::set(path.join(CGROUP_MEMORY_SWAP), swap)?; + Memory::set(path.join(CGROUP_MEMORY_MAX), limit)?; + } + None => { + if limit == -1 { + Memory::set(path.join(CGROUP_MEMORY_SWAP), -1)?; + } + Memory::set(path.join(CGROUP_MEMORY_MAX), limit)?; + } + }, + None => { + if memory.swap.is_some() { + bail!("unsable to set swap limit without memory limit"); + } + } + }; + + if let Some(reservation) = memory.reservation { + if reservation < -1 { + bail!("invalid memory reservation value: {}", reservation); + } + Memory::set(path.join(CGROUP_MEMORY_LOW), reservation)?; + } + Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::cgroups::test::{create_temp_dir, set_fixture}; + use oci_spec::LinuxMemory; + use std::fs::read_to_string; + + #[test] + fn test_set_memory_v2() { + let tmp = create_temp_dir("test_set_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let limit = 1024; + let reservation = 512; + let swap = 2048; + let memory_limits = &LinuxMemory { + limit: Some(limit), + reservation: Some(reservation), + swap: Some(swap), + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + Memory::apply(&tmp, memory_limits).expect("apply memory limits"); + + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit"); + assert_eq!(limit_content, limit.to_string()); + + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); + assert_eq!(swap_content, swap.to_string()); + + let reservation_content = + read_to_string(tmp.join(CGROUP_MEMORY_LOW)).expect("read memory reservation"); + assert_eq!(reservation_content, reservation.to_string()); + } + + #[test] + fn test_set_memory_unlimited_v2() { + let tmp = create_temp_dir("test_set_memory_unlimited_v2") + .expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(-1), + reservation: None, + swap: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + Memory::apply(&tmp, memory_limits).expect("apply memory limits"); + + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit"); + assert_eq!(limit_content, "max"); + + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); + assert_eq!(swap_content, "max"); + } +} From f099a34d5e5b9578ec72d2fbf2344ec6410578b9 Mon Sep 17 00:00:00 2001 From: Travis Sturzl Date: Thu, 17 Jun 2021 22:21:36 -0600 Subject: [PATCH 04/70] hella tests --- src/cgroups/v2/memory.rs | 135 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index f4b7771ab..df00bbf49 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -61,7 +61,7 @@ impl Memory { }, None => { if memory.swap.is_some() { - bail!("unsable to set swap limit without memory limit"); + bail!("unable to set swap limit without memory limit"); } } }; @@ -139,4 +139,137 @@ mod tests { let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); assert_eq!(swap_content, "max"); } + + #[test] + fn test_err_swap_no_memory_v2() { + let tmp = + create_temp_dir("test_err_swap_no_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: None, + swap: Some(512), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + #[test] + fn test_err_bad_limit_v2() { + let tmp = create_temp_dir("test_err_bad_limit_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(-2), + swap: None, + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + #[test] + fn test_err_bad_swap_v2() { + let tmp = create_temp_dir("test_err_bad_swap_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(512), + swap: Some(-3), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + quickcheck! { + fn property_test_set_memory_v2(linux_memory: LinuxMemory) -> bool { + let tmp = create_temp_dir("property_test_set_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let result = Memory::apply(&tmp, &linux_memory); + + // we need to check for expected errors first and foremost or we'll get false negatives + // later + if let Some(limit) = linux_memory.limit { + if limit < -1 { + return result.is_err(); + } + } + + if let Some(swap) = linux_memory.swap { + if swap < -1 { + return result.is_err(); + } + if linux_memory.limit.is_none() { + return result.is_err(); + } + } + + if let Some(reservation) = linux_memory.reservation { + if reservation < -1 { + return result.is_err(); + } + } + + // check the limit file is set as expected + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit to string"); + let limit_check = match linux_memory.limit { + Some(limit) if limit == -1 => limit_content == "max", + Some(limit) => limit_content == limit.to_string(), + None => limit_content == "0", + }; + + // check the swap file is set as expected + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit to string"); + let swap_check = match linux_memory.swap { + Some(swap) if swap == -1 => swap_content == "max", + Some(swap) => swap_content == swap.to_string(), + None => { + match linux_memory.limit { + Some(limit) if limit == -1 => swap_content == "max", + _ => swap_content == "0", + } + } + }; + + + // check the resevation file is set as expected + let reservation_content = read_to_string(tmp.join(CGROUP_MEMORY_LOW)).expect("read memory reservation to string"); + let reservation_check = match linux_memory.reservation { + Some(reservation) if reservation == -1 => reservation_content == "max", + Some(reservation) => reservation_content == reservation.to_string(), + None => reservation_content == "0", + }; + + println!("limit_check: {}", limit_check); + println!("swap_check: {}", swap_check); + println!("reservation_check: {}", reservation_check); + limit_check && swap_check && reservation_check + } + } } From 6a38a5d0893d730c3ee9ea5870674814917ac124 Mon Sep 17 00:00:00 2001 From: Zheming Li Date: Thu, 17 Jun 2021 09:01:30 +0000 Subject: [PATCH 05/70] Add cgroup v1 freezer controller --- oci_spec/src/lib.rs | 8 + src/cgroups/v1/controller_type.rs | 3 + src/cgroups/v1/freezer.rs | 248 ++++++++++++++++++++++++++++++ src/cgroups/v1/manager.rs | 6 +- src/cgroups/v1/memory.rs | 1 + src/cgroups/v1/mod.rs | 1 + 6 files changed, 265 insertions(+), 2 deletions(-) create mode 100644 src/cgroups/v1/freezer.rs diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index 4fbc56371..f629d2b83 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -460,6 +460,7 @@ pub struct LinuxResources { #[serde(default)] pub hugepage_limits: Vec, pub network: Option, + pub freezer: Option, } #[derive(Serialize, Deserialize, Debug, Clone, Copy)] @@ -556,6 +557,13 @@ pub enum LinuxSeccompOperator { ScmpCmpMaskedEq = 7, } +#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +pub enum FreezerState { + Undefined, + Frozen, + Thawed, +} + #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "camelCase")] pub struct Linux { diff --git a/src/cgroups/v1/controller_type.rs b/src/cgroups/v1/controller_type.rs index 68cb25572..449c1bf41 100644 --- a/src/cgroups/v1/controller_type.rs +++ b/src/cgroups/v1/controller_type.rs @@ -11,6 +11,7 @@ pub enum ControllerType { Blkio, NetworkPriority, NetworkClassifier, + Freezer, } impl Display for ControllerType { @@ -26,6 +27,7 @@ impl Display for ControllerType { Self::Blkio => "blkio", Self::NetworkPriority => "net_prio", Self::NetworkClassifier => "net_cls", + Self::Freezer => "freezer", }; write!(f, "{}", print) @@ -43,4 +45,5 @@ pub const CONTROLLERS: &[ControllerType] = &[ ControllerType::Blkio, ControllerType::NetworkPriority, ControllerType::NetworkClassifier, + ControllerType::Freezer, ]; diff --git a/src/cgroups/v1/freezer.rs b/src/cgroups/v1/freezer.rs new file mode 100644 index 000000000..d5fea3cb7 --- /dev/null +++ b/src/cgroups/v1/freezer.rs @@ -0,0 +1,248 @@ +use std::io::prelude::*; +use std::{ + fs::{create_dir_all, OpenOptions}, + path::Path, + thread, time, +}; + +use anyhow::{Result, *}; +use nix::unistd::Pid; + +use crate::cgroups::common::{self, CGROUP_PROCS}; +use crate::cgroups::v1::Controller; +use oci_spec::{FreezerState, LinuxResources}; + +const CGROUP_FREEZER_STATE: &str = "freezer.state"; +const FREEZER_STATE_THAWED: &str = "THAWED"; +const FREEZER_STATE_FROZEN: &str = "FROZEN"; +const FREEZER_STATE_FREEZING: &str = "FREEZING"; + +pub struct Freezer {} + +impl Controller for Freezer { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()> { + log::debug!("Apply Freezer cgroup config"); + create_dir_all(&cgroup_root)?; + + if let Some(freezer_state) = linux_resources.freezer { + Self::apply(freezer_state, cgroup_root)?; + } + + common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; + Ok(()) + } +} + +impl Freezer { + fn apply(freezer_state: FreezerState, cgroup_root: &Path) -> Result<()> { + match freezer_state { + FreezerState::Undefined => {} + FreezerState::Thawed => { + common::write_cgroup_file( + cgroup_root.join(CGROUP_FREEZER_STATE), + FREEZER_STATE_THAWED, + )?; + } + FreezerState::Frozen => { + let r = || -> Result<()> { + // We should do our best to retry if FREEZING is seen until it becomes FROZEN. + // Add sleep between retries occasionally helped when system is extremely slow. + // see: + // https://github.com/opencontainers/runc/blob/b9ee9c6314599f1b4a7f497e1f1f856fe433d3b7/libcontainer/cgroups/fs/freezer.go#L42 + for i in 0..1000 { + if i % 50 == 49 { + let _ = common::write_cgroup_file( + cgroup_root.join(CGROUP_FREEZER_STATE), + FREEZER_STATE_THAWED, + ); + thread::sleep(time::Duration::from_millis(10)); + } + + common::write_cgroup_file( + cgroup_root.join(CGROUP_FREEZER_STATE), + FREEZER_STATE_FROZEN, + )?; + + if i % 25 == 24 { + thread::sleep(time::Duration::from_millis(10)); + } + + let r = Self::read_freezer_state(cgroup_root)?; + match r.trim() { + FREEZER_STATE_FREEZING => { + continue; + } + FREEZER_STATE_FROZEN => { + if i > 1 { + log::debug!("frozen after {} retries", i) + } + return Ok(()); + } + _ => { + // should not reach here. + bail!("unexpected state {} while freezing", r.trim()); + } + } + } + bail!("unbale to freeze"); + }(); + + if r.is_err() { + // Freezing failed, and it is bad and dangerous to leave the cgroup in FROZEN or + // FREEZING, so try to thaw it back. + let _ = common::write_cgroup_file( + cgroup_root.join(CGROUP_FREEZER_STATE), + FREEZER_STATE_THAWED, + ); + } + return r; + } + } + Ok(()) + } + + fn read_freezer_state(cgroup_root: &Path) -> Result { + let path = cgroup_root.join(CGROUP_FREEZER_STATE); + let mut content = String::new(); + OpenOptions::new() + .create(false) + .read(true) + .open(path)? + .read_to_string(&mut content)?; + Ok(content) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cgroups::test::{create_temp_dir, set_fixture}; + use oci_spec::FreezerState; + + #[test] + fn test_set_freezer_state() { + let tmp = + create_temp_dir("test_set_freezer_state").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_FREEZER_STATE, "").expect("Set fixure for freezer state"); + + // set Frozen state. + { + let freezer_state = FreezerState::Frozen; + Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + assert_eq!(FREEZER_STATE_FROZEN, state_content); + } + + // set Thawed state. + { + let freezer_state = FreezerState::Thawed; + Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + assert_eq!(FREEZER_STATE_THAWED, state_content); + } + + // set Undefined state. + { + let old_state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + let freezer_state = FreezerState::Undefined; + Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + assert_eq!(old_state_content, state_content); + } + } + + #[test] + fn test_apply() { + let tmp = + create_temp_dir("test_set_freezer_state").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_FREEZER_STATE, "").expect("Set fixure for freezer state"); + set_fixture(&tmp, CGROUP_PROCS, "").expect("set fixture for proc file"); + + // set Thawed state. + { + let linux_resources = LinuxResources { + devices: vec![], + disable_oom_killer: false, + oom_score_adj: None, + memory: None, + cpu: None, + pids: None, + block_io: None, + hugepage_limits: vec![], + network: None, + freezer: Some(FreezerState::Thawed), + }; + + let pid = Pid::from_raw(1000); + let _ = + ::apply(&linux_resources, &tmp, pid).expect("freezer apply"); + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + assert_eq!(FREEZER_STATE_THAWED, state_content); + let pid_content = + std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("Read to string"); + assert_eq!(pid_content, "1000"); + } + + // set Frozen state. + { + let linux_resources = LinuxResources { + devices: vec![], + disable_oom_killer: false, + oom_score_adj: None, + memory: None, + cpu: None, + pids: None, + block_io: None, + hugepage_limits: vec![], + network: None, + freezer: Some(FreezerState::Frozen), + }; + + let pid = Pid::from_raw(1001); + let _ = + ::apply(&linux_resources, &tmp, pid).expect("freezer apply"); + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + assert_eq!(FREEZER_STATE_FROZEN, state_content); + let pid_content = + std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("Read to string"); + assert_eq!(pid_content, "1001"); + } + + // set Undefined state. + { + let linux_resources = LinuxResources { + devices: vec![], + disable_oom_killer: false, + oom_score_adj: None, + memory: None, + cpu: None, + pids: None, + block_io: None, + hugepage_limits: vec![], + network: None, + freezer: Some(FreezerState::Undefined), + }; + + let old_state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + let pid = Pid::from_raw(1002); + let _ = + ::apply(&linux_resources, &tmp, pid).expect("freezer apply"); + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + assert_eq!(old_state_content, state_content); + let pid_content = + std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("Read to string"); + assert_eq!(pid_content, "1002"); + } + } +} diff --git a/src/cgroups/v1/manager.rs b/src/cgroups/v1/manager.rs index fd5d7a2ea..e7814d372 100644 --- a/src/cgroups/v1/manager.rs +++ b/src/cgroups/v1/manager.rs @@ -9,8 +9,9 @@ use procfs::process::Process; use super::{ blkio::Blkio, controller_type::CONTROLLERS, cpu::Cpu, cpuacct::CpuAcct, cpuset::CpuSet, - devices::Devices, hugetlb::Hugetlb, memory::Memory, network_classifier::NetworkClassifier, - network_priority::NetworkPriority, pids::Pids, util, Controller, + devices::Devices, freezer::Freezer, hugetlb::Hugetlb, memory::Memory, + network_classifier::NetworkClassifier, network_priority::NetworkPriority, pids::Pids, util, + Controller, }; use crate::cgroups::common::CGROUP_PROCS; @@ -70,6 +71,7 @@ impl CgroupManager for Manager { "blkio" => Blkio::apply(linux_resources, &subsys.1, pid)?, "net_prio" => NetworkPriority::apply(linux_resources, &subsys.1, pid)?, "net_cls" => NetworkClassifier::apply(linux_resources, &subsys.1, pid)?, + "freezer" => Freezer::apply(linux_resources, &subsys.1, pid)?, _ => unreachable!("every subsystem should have an associated controller"), } } diff --git a/src/cgroups/v1/memory.rs b/src/cgroups/v1/memory.rs index c60409820..9612f81dd 100644 --- a/src/cgroups/v1/memory.rs +++ b/src/cgroups/v1/memory.rs @@ -364,6 +364,7 @@ mod tests { block_io: None, hugepage_limits: vec![], network: None, + freezer: None, }; let pid = Pid::from_raw(pid_int); diff --git a/src/cgroups/v1/mod.rs b/src/cgroups/v1/mod.rs index 9816dc9f5..ff1855143 100644 --- a/src/cgroups/v1/mod.rs +++ b/src/cgroups/v1/mod.rs @@ -5,6 +5,7 @@ mod cpu; mod cpuacct; mod cpuset; mod devices; +mod freezer; mod hugetlb; pub mod manager; mod memory; From 25bd08b9e45d54cf11366797c8f719d6b141f69a Mon Sep 17 00:00:00 2001 From: Yutaka Juba Date: Fri, 18 Jun 2021 18:10:53 +0900 Subject: [PATCH 06/70] Add a test for applying CpuAcct. --- src/cgroups/v1/cpuacct.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/cgroups/v1/cpuacct.rs b/src/cgroups/v1/cpuacct.rs index 3060aebcf..947599cb2 100644 --- a/src/cgroups/v1/cpuacct.rs +++ b/src/cgroups/v1/cpuacct.rs @@ -19,3 +19,22 @@ impl Controller for CpuAcct { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::cgroups::test::setup; + + #[test] + fn test_apply() { + let (tmp, procs) = setup("test_cpuacct_apply", CGROUP_PROCS); + let resource = LinuxResources::default(); + let pid = Pid::from_raw(1000); + + CpuAcct::apply(&resource, &tmp, pid).expect("apply cpuacct"); + + let content = fs::read_to_string(&procs) + .unwrap_or_else(|_| panic!("read {} file content", CGROUP_PROCS)); + assert_eq!(content, "1000"); + } +} From 5714fc4a4f553d4038698e21dc01d2660c080e42 Mon Sep 17 00:00:00 2001 From: utam0k Date: Fri, 18 Jun 2021 22:49:40 +0900 Subject: [PATCH 07/70] improve build time in CI --- .github/workflows/main.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 68e805cd2..0e4c4a9b9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,6 +11,15 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 + - uses: actions/cache@v2 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - run: rustup component add clippy - uses: actions-rs/clippy-check@v1 with: @@ -27,6 +36,15 @@ jobs: - uses: actions/checkout@v2 with: submodules: recursive + - uses: actions/cache@v2 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - uses: actions-rs/toolchain@v1 with: toolchain: stable From 181518b867eab1792a8f6d984890672247bed8d2 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Tue, 15 Jun 2021 17:29:07 +0200 Subject: [PATCH 08/70] Check if rootless container is required and ensure prerequisites --- src/lib.rs | 1 + src/rootless.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 src/rootless.rs diff --git a/src/lib.rs b/src/lib.rs index 98be65394..b49d6fcd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,3 +18,4 @@ pub mod start; pub mod stdio; pub mod tty; pub mod utils; +pub mod rootless; diff --git a/src/rootless.rs b/src/rootless.rs new file mode 100644 index 000000000..e22b71890 --- /dev/null +++ b/src/rootless.rs @@ -0,0 +1,66 @@ +use anyhow::{bail, Result}; +use nix::sched::CloneFlags; +use oci_spec::{LinuxIdMapping, Mount, Spec}; + +use crate::namespaces::Namespaces; + +/// Checks if rootless mode should be used +pub fn should_use_rootless() -> Result { + if !nix::unistd::geteuid().is_root() { + return Ok(true); + } + + if let Ok("true") = std::env::var("YOUKI_USE_ROOTLESS").as_deref() { + return Ok(true); + } + + Ok(false) +} + +/// Validates that the spec contains the required information for +/// running in rootless mode +pub fn validate(spec: &Spec) -> Result<()> { + let linux = spec.linux.as_ref().unwrap(); + + if linux.uid_mappings.is_empty() { + bail!("rootless containers require at least one uid mapping"); + } + + if linux.gid_mappings.is_empty() { + bail!("rootless containers require at least one gid mapping") + } + + let namespaces: Namespaces = linux.namespaces.clone().into(); + if !namespaces.clone_flags.contains(CloneFlags::CLONE_NEWUSER) { + bail!("rootless containers require the specification of a user namespace"); + } + + validate_mounts(&spec.mounts, &linux.uid_mappings, &linux.gid_mappings)?; + + Ok(()) +} + +fn validate_mounts( + mounts: &Vec, + uid_mappings: &Vec, + gid_mappings: &Vec, +) -> Result<()> { + for mount in mounts { + for opt in &mount.options { + if opt.starts_with("uid=") && !is_id_mapped(&opt[4..], uid_mappings)? { + bail!("Mount {:?} specifies option {} which is not mapped inside the rootless container", mount, opt); + } else if opt.starts_with("gid=") && !is_id_mapped(&opt[4..], gid_mappings)? { + bail!("Mount {:?} specifies option {} which is not mapped inside the rootless container", mount, opt); + } + } + } + + Ok(()) +} + +fn is_id_mapped(id: &str, mappings: &Vec) -> Result { + let id = id.parse::()?; + Ok(mappings + .iter() + .all(|m| id >= m.container_id && id <= m.container_id + m.size)) +} \ No newline at end of file From 16687c424d7db10b8648b264f204a3145f66ebde Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Tue, 15 Jun 2021 17:50:20 +0200 Subject: [PATCH 09/70] Ensure map binaries are available --- src/create.rs | 17 ++++++++++++++- src/rootless.rs | 55 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/create.rs b/src/create.rs index 3c1b19e1d..00cff2af0 100644 --- a/src/create.rs +++ b/src/create.rs @@ -9,7 +9,8 @@ use nix::sched; use nix::unistd; use nix::unistd::{Gid, Uid}; -use crate::cgroups; +use crate::rootless::{Rootless, lookup_map_binaries, should_use_rootless}; +use crate::{cgroups, rootless}; use crate::container::{Container, ContainerStatus}; use crate::namespaces::Namespaces; use crate::notify_socket::NotifyListener; @@ -131,6 +132,20 @@ fn run_container>( let linux = spec.linux.as_ref().unwrap(); let namespaces: Namespaces = linux.namespaces.clone().into(); + let rootless = if let Ok(true) = should_use_rootless() { + log::debug!("rootless container should be created"); + rootless::validate(&spec)?; + let mut rootless = Rootless::from(linux); + if let Some((uid_binary, gid_binary)) = lookup_map_binaries(linux)? { + rootless.newuidmap = Some(uid_binary); + rootless.newgidmap = Some(gid_binary); + } + Some(rootless) + } else { + None + }; + + let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, container.id()); let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path)?; diff --git a/src/rootless.rs b/src/rootless.rs index e22b71890..06e48e95d 100644 --- a/src/rootless.rs +++ b/src/rootless.rs @@ -1,9 +1,34 @@ +use std::{env, path::PathBuf}; + use anyhow::{bail, Result}; use nix::sched::CloneFlags; -use oci_spec::{LinuxIdMapping, Mount, Spec}; +use oci_spec::{Linux, LinuxIdMapping, Mount, Spec}; use crate::namespaces::Namespaces; +#[derive(Debug, Clone)] +pub struct Rootless { + /// Location of the newuidmap binary + pub newuidmap: Option, + /// Location of the newgidmap binary + pub newgidmap: Option, + /// Mappings for user ids + pub uid_mappings: Vec, + /// Mappings for group ids + pub gid_mappings: Vec, +} + +impl From<&Linux> for Rootless { + fn from(linux: &Linux) -> Self { + Self { + newuidmap: None, + newgidmap: None, + uid_mappings: linux.uid_mappings.clone(), + gid_mappings: linux.uid_mappings.clone(), + } + } +} + /// Checks if rootless mode should be used pub fn should_use_rootless() -> Result { if !nix::unistd::geteuid().is_root() { @@ -63,4 +88,32 @@ fn is_id_mapped(id: &str, mappings: &Vec) -> Result { Ok(mappings .iter() .all(|m| id >= m.container_id && id <= m.container_id + m.size)) +} + +/// Looks up the location of the newuidmap and newgidmap binaries which +/// are required to write multiple user/group mappings +pub fn lookup_map_binaries(spec: &Linux) -> Result> { + if spec.uid_mappings.len() == 1 && spec.uid_mappings.len() == 1 { + return Ok(None); + } + + let uidmap = lookup_map_binary("newuidmap")?; + let gidmap = lookup_map_binary("newgidmap")?; + + match (uidmap, gidmap) { + (Some(newuidmap), Some(newgidmap)) => Ok(Some((newuidmap, newgidmap))), + _ => bail!("newuidmap/newgidmap binaries could not be found in path. This is required if multiple id mappings are specified"), + } +} + +fn lookup_map_binary(binary: &str) -> Result> { + let paths = env::var("PATH")?; + for p in paths.split_terminator(':') { + let binary_path = PathBuf::from(p).join(binary); + if binary_path.exists() { + return Ok(Some(binary_path)); + } + } + + Ok(None) } \ No newline at end of file From 5f36c6e0b5d8de6e0c22d8718ab35c1d32aa9600 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Tue, 15 Jun 2021 22:38:51 +0200 Subject: [PATCH 10/70] Implement protocol for identifier mapping --- src/create.rs | 4 +- src/process/child.rs | 31 ++--- src/process/fork.rs | 32 +++--- src/process/message.rs | 5 + src/process/mod.rs | 2 + src/process/parent.rs | 253 ++++++++++++++++++++++++++++++++++------- src/utils.rs | 7 ++ 7 files changed, 255 insertions(+), 79 deletions(-) diff --git a/src/create.rs b/src/create.rs index 00cff2af0..84360af38 100644 --- a/src/create.rs +++ b/src/create.rs @@ -152,9 +152,7 @@ fn run_container>( // first fork, which creates process, which will later create actual container process match fork::fork_first( pid_file, - namespaces - .clone_flags - .contains(sched::CloneFlags::CLONE_NEWUSER), + rootless, linux, &container, cmanager, diff --git a/src/process/child.rs b/src/process/child.rs index 2bdca4931..65db39921 100644 --- a/src/process/child.rs +++ b/src/process/child.rs @@ -1,6 +1,5 @@ use std::io::ErrorKind; use std::io::Read; -use std::io::Write; use anyhow::{bail, Result}; use mio::unix::pipe; @@ -9,6 +8,7 @@ use mio::unix::pipe::Sender; use mio::{Events, Interest, Poll, Token}; use nix::unistd::Pid; +use super::parent::ParentChannel; use super::{MAX_EVENTS, WAIT_FOR_INIT}; use crate::process::message::Message; @@ -18,7 +18,7 @@ const CHILD: Token = Token(1); /// Contains sending end of pipe for parent process, receiving end of pipe /// for the init process and poller for that pub struct ChildProcess { - sender_for_parent: Sender, + parent_channel: ParentChannel, receiver: Option, poll: Option, } @@ -29,9 +29,9 @@ pub struct ChildProcess { // a process point of view, init process is child of child process, which is child of original youki process. impl ChildProcess { /// create a new Child process structure - pub fn new(sender_for_parent: Sender) -> Result { + pub fn new(parent_channel: ParentChannel) -> Result { Ok(Self { - sender_for_parent, + parent_channel, receiver: None, poll: None, }) @@ -55,24 +55,17 @@ impl ChildProcess { /// Indicate that child process has forked the init process to parent process pub fn notify_parent(&mut self, init_pid: Pid) -> Result<()> { - log::debug!( - "child send to parent {:?}", - (Message::ChildReady as u8).to_be_bytes() - ); - // write ChildReady message to the pipe to parent - self.write_message_for_parent(Message::ChildReady)?; - // write pid of init process which is forked by child process to the pipe, - // Pid in nix::unistd is type alias of SessionId which itself is alias of i32 - self.sender_for_parent - .write_all(&(init_pid.as_raw()).to_be_bytes())?; + self.parent_channel.send_init_pid(init_pid)?; + Ok(()) + } + + pub fn request_identifier_mapping(&mut self) -> Result<()> { + self.parent_channel.request_identifier_mapping()?; Ok(()) } - /// writes given message to pipe for the parent - #[inline] - fn write_message_for_parent(&mut self, msg: Message) -> Result<()> { - self.sender_for_parent - .write_all(&(msg as u8).to_be_bytes())?; + pub fn wait_for_mapping_ack(&mut self) -> Result<()> { + self.parent_channel.wait_for_mapping_ack()?; Ok(()) } diff --git a/src/process/fork.rs b/src/process/fork.rs index 9689dacff..b65e3afd4 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -16,23 +16,22 @@ use nix::unistd::Pid; use crate::cgroups::common::CgroupManager; use crate::container::ContainerStatus; use crate::process::{child, init, parent, Process}; -use crate::{container::Container, pipe::Pipe}; +use crate::rootless::Rootless; +use crate::{container::Container}; /// Function to perform the first fork for in order to run the container process pub fn fork_first>( pid_file: Option

, - is_userns: bool, + rootless: Option, linux: &oci_spec::Linux, container: &Container, cmanager: Box, ) -> Result { - // create a new pipe - let cpipe = Pipe::new()?; - // create new parent process structure - let (mut parent, sender_for_parent) = parent::ParentProcess::new()?; + let (mut parent, parent_channel) = parent::ParentProcess::new(rootless.clone())?; // create a new child process structure with sending end of parent process - let child = child::ChildProcess::new(sender_for_parent)?; + let mut child = child::ChildProcess::new(parent_channel)?; + // fork the process match unsafe { unistd::fork()? } { @@ -51,21 +50,28 @@ pub fn fork_first>( // if new user is specified in specification, this will be true // and new namespace will be created, check https://man7.org/linux/man-pages/man7/user_namespaces.7.html // for more information - if is_userns { + if rootless.is_some() { + log::debug!("creating new user namespace"); sched::unshare(sched::CloneFlags::CLONE_NEWUSER)?; + + // child needs to be dumpable, otherwise the non root parent is not + // allowed to write the uid/gid maps + prctl::set_dumpable(true).unwrap(); + child.request_identifier_mapping()?; + child.wait_for_mapping_ack()?; + prctl::set_dumpable(false).unwrap(); } - cpipe.notify()?; Ok(Process::Child(child)) } // in the parent process unistd::ForkResult::Parent { child } => { - cpipe.wait()?; - // wait for child to fork init process and report back its pid - let init_pid = parent.wait_for_child_ready()?; + let init_pid = parent.wait_for_child_ready(child)?; log::debug!("init pid is {:?}", init_pid); - cmanager.apply(&linux.resources.as_ref().unwrap(), Pid::from_raw(init_pid))?; + if rootless.is_none() { + cmanager.apply(&linux.resources.as_ref().unwrap(), Pid::from_raw(init_pid))?; + } // update status and pid of the container process container diff --git a/src/process/message.rs b/src/process/message.rs index fddf09ab9..7e71373e1 100644 --- a/src/process/message.rs +++ b/src/process/message.rs @@ -3,6 +3,8 @@ pub enum Message { ChildReady = 0x00, InitReady = 0x01, + WriteMapping = 0x02, + MappingWritten = 0x03, } impl From for Message { @@ -10,7 +12,10 @@ impl From for Message { match from { 0x00 => Message::ChildReady, 0x01 => Message::InitReady, + 0x02 => Message::WriteMapping, + 0x03 => Message::MappingWritten, _ => panic!("unknown message."), } } } + diff --git a/src/process/mod.rs b/src/process/mod.rs index c64fea8bf..99ff334e8 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -26,3 +26,5 @@ const MAX_EVENTS: usize = 128; const WAIT_FOR_CHILD: Duration = Duration::from_secs(5); /// Time to wait when polling for message from init process const WAIT_FOR_INIT: Duration = Duration::from_millis(1000); +/// Time to wait when polling for mapping ack from parent +const WAIT_FOR_MAPPING: Duration = Duration::from_secs(3); diff --git a/src/process/parent.rs b/src/process/parent.rs index 3ff941794..80feaa2b8 100644 --- a/src/process/parent.rs +++ b/src/process/parent.rs @@ -1,89 +1,254 @@ use std::io::ErrorKind; use std::io::Read; +use std::io::Write; +use std::path::Path; +use std::process::Command; use super::{MAX_EVENTS, WAIT_FOR_CHILD}; +use crate::process::WAIT_FOR_MAPPING; use crate::process::message::Message; +use crate::rootless::Rootless; +use crate::utils; +use anyhow::Context; use anyhow::{bail, Result}; use mio::unix::pipe; use mio::unix::pipe::{Receiver, Sender}; use mio::{Events, Interest, Poll, Token}; +use nix::unistd::Pid; +use oci_spec::LinuxIdMapping; // Token is used to identify which socket generated an event const PARENT: Token = Token(0); /// Contains receiving end of pipe to child process and a poller for that. pub struct ParentProcess { - receiver: Receiver, - poll: Poll, + child_channel: ChildChannel, } // Poll is used to register and listen for various events // by registering it with an event source such as receiving end of a pipe impl ParentProcess { /// Create new Parent process structure - pub fn new() -> Result<(Self, Sender)> { - // create a new pipe - let (sender, mut receiver) = pipe::new()?; - // create a new poll, and register the receiving end of pipe to it - // This will poll for the read events, so when data is written to sending end of the pipe, - // the receiving end will be readable and poll wil notify + pub fn new(rootless: Option) -> Result<(Self, ParentChannel)> { + let (parent_channel, child_channel) = Self::setup_pipes(rootless)?; + let parent = Self { + child_channel, + }; + + Ok((parent, parent_channel)) + } + + fn setup_pipes(rootless: Option) -> Result<(ParentChannel, ChildChannel)> { + let (send_to_parent, receive_from_child) = pipe::new()?; + let (send_to_child, receive_from_parent) = pipe::new()?; + + let parent_channel = ParentChannel::new(send_to_parent, receive_from_parent)?; + let child_channel = ChildChannel::new(send_to_child, receive_from_child, rootless)?; + + Ok((parent_channel, child_channel)) + } + + /// Waits for associated child process to send ready message + /// and return the pid of init process which is forked by child process + pub fn wait_for_child_ready(&mut self, child_pid: Pid) -> Result { + let init_pid = self.child_channel.wait_for_child_ready(child_pid)?; + Ok(init_pid) + } +} + +// Channel for communicating with the parent +pub struct ParentChannel { + sender: Sender, + receiver: Receiver, + poll: Poll, +} + +impl ParentChannel { + fn new(sender: Sender, mut receiver: Receiver) -> Result { let poll = Poll::new()?; poll.registry() .register(&mut receiver, PARENT, Interest::READABLE)?; - Ok((Self { receiver, poll }, sender)) + Ok(Self { + sender, + receiver, + poll, + }) } - /// Waits for associated child process to send ready message - /// and return the pid of init process which is forked by child process - pub fn wait_for_child_ready(&mut self) -> Result { - // Create collection with capacity to store up to MAX_EVENTS events + pub fn send_init_pid(&mut self, pid: Pid) -> Result<()> { + // write ChildReady message to the pipe to parent + log::debug!("[child to parent] sending init pid ({:?})", pid); + self.write_message(Message::ChildReady)?; + // write pid of init process which is forked by child process to the pipe, + // Pid in nix::unistd is type alias of SessionId which itself is alias of i32 + self.sender.write_all(&(pid.as_raw()).to_be_bytes())?; + Ok(()) + } + + // requests the parent to write the id mappings for the child process + // this needs to be done from the parent see https://man7.org/linux/man-pages/man7/user_namespaces.7.html + pub fn request_identifier_mapping(&mut self) -> Result<()> { + log::debug!("[child to parent] request identifier mapping"); + self.write_message(Message::WriteMapping)?; + Ok(()) + } + + // wait until the parent process has finished writing the id mappings + pub fn wait_for_mapping_ack(&mut self) -> Result<()> { let mut events = Events::with_capacity(MAX_EVENTS); + log::debug!("waiting for ack from parent"); - // poll the receiving end of pipe created for WAIT_FOR_CHILD duration for an event - self.poll.poll(&mut events, Some(WAIT_FOR_CHILD))?; + self.poll.poll(&mut events, Some(WAIT_FOR_MAPPING))?; for event in events.iter() { - // check if the event token in PARENT - // note that this does not assign anything to PARENT, but instead compares PARENT and event.token() - // check http://patshaughnessy.net/2018/1/18/learning-rust-if-let-vs--match for a bit more detailed explanation - if let PARENT = event.token() { - // read data from pipe + if event.token() == PARENT { let mut buf = [0; 1]; match self.receiver.read_exact(&mut buf) { - // This error simply means that there are no more incoming connections waiting to be accepted at this point. Err(ref e) if e.kind() == ErrorKind::WouldBlock => (), Err(e) => bail!( "Failed to receive a message from the child process. {:?}", e ), _ => (), - }; - // convert to Message wrapper + } + match Message::from(u8::from_be_bytes(buf)) { - Message::ChildReady => { - // read pid of init process forked by child, 4 bytes as the type is i32 - let mut buf = [0; 4]; - match self.receiver.read_exact(&mut buf) { - // This error simply means that there are no more incoming connections waiting to be accepted at this point. - Err(ref e) if e.kind() == ErrorKind::WouldBlock => (), - Err(e) => bail!( - "Failed to receive a message from the child process. {:?}", - e - ), - _ => (), + Message::MappingWritten => return Ok(()), + msg => bail!("receive unexpected message {:?} in child process", msg), + } + } + } + unreachable!("timed out waiting for mapping ack from parent") + } + + #[inline] + fn write_message(&mut self, msg: Message) -> Result<()> { + self.sender.write_all(&(msg as u8).to_be_bytes())?; + Ok(()) + } +} + +struct ChildChannel { + sender: Sender, + receiver: Receiver, + poll: Poll, + rootless: Option +} + +impl ChildChannel { + fn new(sender: Sender, mut receiver: Receiver, rootless: Option) -> Result { + let poll = Poll::new()?; + poll.registry() + .register(&mut receiver, PARENT, Interest::READABLE)?; + Ok(Self { + sender, + receiver, + poll, + rootless, + }) + } + + /// Waits for associated child process to send ready message + /// and return the pid of init process which is forked by child process + pub fn wait_for_child_ready(&mut self, child_pid: Pid) -> Result { + // Create collection with capacity to store up to MAX_EVENTS events + let mut events = Events::with_capacity(MAX_EVENTS); + loop { + // poll the receiving end of pipe created for WAIT_FOR_CHILD duration for an event + self.poll.poll(&mut events, Some(WAIT_FOR_CHILD))?; + for event in events.iter() { + // check if the event token in PARENT + // note that this does not assign anything to PARENT, but instead compares PARENT and event.token() + // check http://patshaughnessy.net/2018/1/18/learning-rust-if-let-vs--match for a bit more detailed explanation + if let PARENT = event.token() { + // read data from pipe + let mut buf = [0; 1]; + match self.receiver.read_exact(&mut buf) { + // This error simply means that there are no more incoming connections waiting to be accepted at this point. + Err(ref e) if e.kind() == ErrorKind::WouldBlock => { + break; } - return Ok(i32::from_be_bytes(buf)); + Err(e) => bail!( + "Failed to receive a message from the child process. {:?}", + e + ), + _ => (), + }; + // convert to Message wrapper + match Message::from(u8::from_be_bytes(buf)) { + Message::ChildReady => { + log::debug!("received child ready message"); + // read pid of init process forked by child, 4 bytes as the type is i32 + let mut buf = [0; 4]; + match self.receiver.read_exact(&mut buf) { + // This error simply means that there are no more incoming connections waiting to be accepted at this point. + Err(ref e) if e.kind() == ErrorKind::WouldBlock => (), + Err(e) => bail!( + "Failed to receive a message from the child process. {:?}", + e + ), + _ => (), + } + return Ok(i32::from_be_bytes(buf)); + } + Message::WriteMapping => { + log::debug!("write mapping for pid {:?}", child_pid); + utils::write_file(format!("/proc/{}/setgroups", child_pid), "deny")?; + self.write_uid_mapping(child_pid)?; + self.write_gid_mapping(child_pid)?; + self.notify_mapping_written()?; + } + msg => bail!("receive unexpected message {:?} in parent process", msg), } - msg => bail!("receive unexpected message {:?} in parent process", msg), + } else { + // as the poll is registered with only parent token + unreachable!() } - } else { - // as the poll is registered with only parent token - unreachable!() } } - // should not reach here, as there should be a ready event from child within WAIT_FOR_CHILD duration - unreachable!( - "No message received from child process within {} seconds", - WAIT_FOR_CHILD.as_secs() - ); + } + + fn notify_mapping_written(&mut self) -> Result<()> { + self.sender + .write_all(&(Message::MappingWritten as u8).to_be_bytes())?; + Ok(()) + } + + fn write_uid_mapping(&self, target_pid: Pid) -> Result<()> { + let rootless = self.rootless.as_ref().unwrap(); + write_id_mapping( + &format!("/proc/{}/uid_map", target_pid), + &rootless.uid_mappings, + rootless.newuidmap.as_deref(), + ) + } + + fn write_gid_mapping(&self, target_pid: Pid) -> Result<()> { + let rootless = self.rootless.as_ref().unwrap(); + write_id_mapping( + &format!("/proc/{}/gid_map", target_pid), + &rootless.gid_mappings, + rootless.newgidmap.as_deref(), + ) } } + +fn write_id_mapping( + map_file: &str, + mappings: &Vec, + map_binary: Option<&Path>, +) -> Result<()> { + let mappings: Vec = mappings + .iter() + .map(|m| format!("{} {} {}", m.container_id, m.host_id, m.size)) + .collect(); + if mappings.len() == 1 { + utils::write_file(map_file, mappings.first().unwrap())?; + } else { + Command::new(map_binary.unwrap()) + .args(mappings) + .output() + .with_context(|| format!("failed to execute {:?}", map_binary))?; + } + + Ok(()) +} \ No newline at end of file diff --git a/src/utils.rs b/src/utils.rs index 178b3d054..441103dde 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -6,6 +6,7 @@ use std::fs; use std::path::{Path, PathBuf}; use std::time::Duration; +use anyhow::Context; use anyhow::{bail, Result}; use nix::unistd; @@ -88,6 +89,12 @@ pub fn delete_with_retry>(path: P) -> Result<()> { bail!("could not delete {:?}", path) } +pub fn write_file, C: AsRef<[u8]>>(path: P, contents: C) -> Result<()> { + let path = path.as_ref(); + fs::write(path, contents).with_context(|| format!("failed to write to {:?}", path))?; + Ok(()) +} + #[cfg(test)] mod tests { use super::*; From f9b722939455b12cad46f2bf4c69d45855b1bc6e Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Tue, 15 Jun 2021 23:54:28 +0200 Subject: [PATCH 11/70] Ensure root directory can be written by non root user --- src/main.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 04ee7102e..ef84be87e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,7 @@ use nix::sys::signal as nix_signal; use youki::command::linux::LinuxCommand; use youki::container::{Container, ContainerStatus}; use youki::create; +use youki::rootless::should_use_rootless; use youki::signal; use youki::start; @@ -79,7 +80,11 @@ fn main() -> Result<()> { eprintln!("log init failed: {:?}", e); } - let root_path = PathBuf::from(&opts.root); + let root_path = if should_use_rootless()? && opts.root.eq(&PathBuf::from("/run/youki")) { + PathBuf::from("/tmp/rootless") + } else { + PathBuf::from(&opts.root) + }; fs::create_dir_all(&root_path)?; match opts.subcmd { From 427e72629fdc3c5e5f92860a7667b1c98c887151 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Thu, 17 Jun 2021 16:09:24 +0200 Subject: [PATCH 12/70] Identifier mapping names were not correct --- oci_spec/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index 4fbc56371..d05efc6b9 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -559,9 +559,9 @@ pub enum LinuxSeccompOperator { #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "camelCase")] pub struct Linux { - #[serde(default, rename = "LinuxIDMapping")] + #[serde(default)] pub uid_mappings: Vec, - #[serde(default, rename = "LinuxIDMapping")] + #[serde(default)] pub gid_mappings: Vec, #[serde(default)] pub sysctl: HashMap, From 373c45782065d26e269a2fb2d3c1f83a163d1605 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Thu, 17 Jun 2021 16:10:02 +0200 Subject: [PATCH 13/70] Only one mapping needs to match --- src/rootless.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rootless.rs b/src/rootless.rs index 06e48e95d..f669f9a1e 100644 --- a/src/rootless.rs +++ b/src/rootless.rs @@ -87,7 +87,7 @@ fn is_id_mapped(id: &str, mappings: &Vec) -> Result { let id = id.parse::()?; Ok(mappings .iter() - .all(|m| id >= m.container_id && id <= m.container_id + m.size)) + .any(|m| id >= m.container_id && id <= m.container_id + m.size)) } /// Looks up the location of the newuidmap and newgidmap binaries which From e2077353927df792467089bce58c14306b8b9b96 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Thu, 17 Jun 2021 22:19:08 +0200 Subject: [PATCH 14/70] Prevent panic when resources are not specified --- src/process/fork.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/process/fork.rs b/src/process/fork.rs index b65e3afd4..ba0c22c97 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -69,7 +69,7 @@ pub fn fork_first>( // wait for child to fork init process and report back its pid let init_pid = parent.wait_for_child_ready(child)?; log::debug!("init pid is {:?}", init_pid); - if rootless.is_none() { + if rootless.is_none() && linux.resources.is_some() { cmanager.apply(&linux.resources.as_ref().unwrap(), Pid::from_raw(init_pid))?; } From 8e1621e96caa89aa7128287b5dc00015eb3e821f Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Fri, 18 Jun 2021 22:55:43 +0200 Subject: [PATCH 15/70] Fix wrong mapping --- src/rootless.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rootless.rs b/src/rootless.rs index f669f9a1e..971b34308 100644 --- a/src/rootless.rs +++ b/src/rootless.rs @@ -24,7 +24,7 @@ impl From<&Linux> for Rootless { newuidmap: None, newgidmap: None, uid_mappings: linux.uid_mappings.clone(), - gid_mappings: linux.uid_mappings.clone(), + gid_mappings: linux.gid_mappings.clone(), } } } From bc7fd5d024936b979955242f4b7fffb37771fb96 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Fri, 18 Jun 2021 23:51:37 +0200 Subject: [PATCH 16/70] Clippy and fmt --- src/create.rs | 13 +++---------- src/lib.rs | 2 +- src/main.rs | 2 +- src/process/fork.rs | 3 +-- src/process/message.rs | 1 - src/process/parent.rs | 16 +++++++--------- src/rootless.rs | 14 ++++++++------ 7 files changed, 21 insertions(+), 30 deletions(-) diff --git a/src/create.rs b/src/create.rs index 84360af38..20b97e27a 100644 --- a/src/create.rs +++ b/src/create.rs @@ -9,17 +9,17 @@ use nix::sched; use nix::unistd; use nix::unistd::{Gid, Uid}; -use crate::rootless::{Rootless, lookup_map_binaries, should_use_rootless}; -use crate::{cgroups, rootless}; use crate::container::{Container, ContainerStatus}; use crate::namespaces::Namespaces; use crate::notify_socket::NotifyListener; use crate::process::{fork, Process}; use crate::rootfs; +use crate::rootless::{lookup_map_binaries, should_use_rootless, Rootless}; use crate::stdio::FileDescriptor; use crate::tty; use crate::utils; use crate::{capabilities, command::Command}; +use crate::{cgroups, rootless}; /// This is the main structure which stores various commandline options given by /// high-level container runtime @@ -145,18 +145,11 @@ fn run_container>( None }; - let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, container.id()); let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path)?; // first fork, which creates process, which will later create actual container process - match fork::fork_first( - pid_file, - rootless, - linux, - &container, - cmanager, - )? { + match fork::fork_first(pid_file, rootless, linux, &container, cmanager)? { // In the parent process, which called run_container Process::Parent(parent) => Ok(Process::Parent(parent)), // in child process diff --git a/src/lib.rs b/src/lib.rs index b49d6fcd5..75326ef25 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,9 +13,9 @@ pub mod notify_socket; pub mod pipe; pub mod process; pub mod rootfs; +pub mod rootless; pub mod signal; pub mod start; pub mod stdio; pub mod tty; pub mod utils; -pub mod rootless; diff --git a/src/main.rs b/src/main.rs index ef84be87e..cca26625e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -81,7 +81,7 @@ fn main() -> Result<()> { } let root_path = if should_use_rootless()? && opts.root.eq(&PathBuf::from("/run/youki")) { - PathBuf::from("/tmp/rootless") + PathBuf::from("/tmp/rootless") } else { PathBuf::from(&opts.root) }; diff --git a/src/process/fork.rs b/src/process/fork.rs index ba0c22c97..8a7a25198 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -14,10 +14,10 @@ use nix::unistd; use nix::unistd::Pid; use crate::cgroups::common::CgroupManager; +use crate::container::Container; use crate::container::ContainerStatus; use crate::process::{child, init, parent, Process}; use crate::rootless::Rootless; -use crate::{container::Container}; /// Function to perform the first fork for in order to run the container process pub fn fork_first>( @@ -31,7 +31,6 @@ pub fn fork_first>( let (mut parent, parent_channel) = parent::ParentProcess::new(rootless.clone())?; // create a new child process structure with sending end of parent process let mut child = child::ChildProcess::new(parent_channel)?; - // fork the process match unsafe { unistd::fork()? } { diff --git a/src/process/message.rs b/src/process/message.rs index 7e71373e1..386b4fb77 100644 --- a/src/process/message.rs +++ b/src/process/message.rs @@ -18,4 +18,3 @@ impl From for Message { } } } - diff --git a/src/process/parent.rs b/src/process/parent.rs index 80feaa2b8..bd1fe6d2f 100644 --- a/src/process/parent.rs +++ b/src/process/parent.rs @@ -5,8 +5,8 @@ use std::path::Path; use std::process::Command; use super::{MAX_EVENTS, WAIT_FOR_CHILD}; -use crate::process::WAIT_FOR_MAPPING; use crate::process::message::Message; +use crate::process::WAIT_FOR_MAPPING; use crate::rootless::Rootless; use crate::utils; use anyhow::Context; @@ -31,9 +31,7 @@ impl ParentProcess { /// Create new Parent process structure pub fn new(rootless: Option) -> Result<(Self, ParentChannel)> { let (parent_channel, child_channel) = Self::setup_pipes(rootless)?; - let parent = Self { - child_channel, - }; + let parent = Self { child_channel }; Ok((parent, parent_channel)) } @@ -131,7 +129,7 @@ struct ChildChannel { sender: Sender, receiver: Receiver, poll: Poll, - rootless: Option + rootless: Option, } impl ChildChannel { @@ -209,7 +207,7 @@ impl ChildChannel { fn notify_mapping_written(&mut self) -> Result<()> { self.sender - .write_all(&(Message::MappingWritten as u8).to_be_bytes())?; + .write_all(&(Message::MappingWritten as u8).to_be_bytes())?; Ok(()) } @@ -221,7 +219,7 @@ impl ChildChannel { rootless.newuidmap.as_deref(), ) } - + fn write_gid_mapping(&self, target_pid: Pid) -> Result<()> { let rootless = self.rootless.as_ref().unwrap(); write_id_mapping( @@ -234,7 +232,7 @@ impl ChildChannel { fn write_id_mapping( map_file: &str, - mappings: &Vec, + mappings: &[LinuxIdMapping], map_binary: Option<&Path>, ) -> Result<()> { let mappings: Vec = mappings @@ -251,4 +249,4 @@ fn write_id_mapping( } Ok(()) -} \ No newline at end of file +} diff --git a/src/rootless.rs b/src/rootless.rs index 971b34308..8918b0289 100644 --- a/src/rootless.rs +++ b/src/rootless.rs @@ -66,15 +66,17 @@ pub fn validate(spec: &Spec) -> Result<()> { } fn validate_mounts( - mounts: &Vec, - uid_mappings: &Vec, - gid_mappings: &Vec, + mounts: &[Mount], + uid_mappings: &[LinuxIdMapping], + gid_mappings: &[LinuxIdMapping], ) -> Result<()> { for mount in mounts { for opt in &mount.options { if opt.starts_with("uid=") && !is_id_mapped(&opt[4..], uid_mappings)? { bail!("Mount {:?} specifies option {} which is not mapped inside the rootless container", mount, opt); - } else if opt.starts_with("gid=") && !is_id_mapped(&opt[4..], gid_mappings)? { + } + + if opt.starts_with("gid=") && !is_id_mapped(&opt[4..], gid_mappings)? { bail!("Mount {:?} specifies option {} which is not mapped inside the rootless container", mount, opt); } } @@ -83,7 +85,7 @@ fn validate_mounts( Ok(()) } -fn is_id_mapped(id: &str, mappings: &Vec) -> Result { +fn is_id_mapped(id: &str, mappings: &[LinuxIdMapping]) -> Result { let id = id.parse::()?; Ok(mappings .iter() @@ -116,4 +118,4 @@ fn lookup_map_binary(binary: &str) -> Result> { } Ok(None) -} \ No newline at end of file +} From 726c06cdcf945823213f83808466e282c7f1d8f9 Mon Sep 17 00:00:00 2001 From: utam0k Date: Sat, 19 Jun 2021 14:39:59 +0900 Subject: [PATCH 17/70] remove the cargo-when dependency. --- .github/workflows/main.yml | 2 -- README.md | 4 ---- build.sh | 5 ++--- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0e4c4a9b9..1675687d4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,7 +25,6 @@ jobs: with: token: ${{ secrets.GITHUB_TOKEN }} args: --all-features - - run: cargo install cargo-when - name: Build run: ./build.sh - name: Run tests @@ -48,7 +47,6 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: stable - - run: cargo install cargo-when - name: Build run: ./build.sh - uses: actions/setup-go@v2 diff --git a/README.md b/README.md index 374219f7a..09dd2c3df 100644 --- a/README.md +++ b/README.md @@ -42,10 +42,6 @@ For other platforms, please use the devcontainer that we prepared. ## Building -```sh -$ cargo install cargo-when # installs prerequisite for building youki -``` - ```sh $ git clone git@github.com:containers/youki.git $ cd youki diff --git a/build.sh b/build.sh index d8de37c30..1beaa739a 100755 --- a/build.sh +++ b/build.sh @@ -8,8 +8,7 @@ VERSION=debug if [[ "$1" == "--release" ]]; then VERSION=release fi -cargo when --channel=stable build --verbose $TGT $1 && \ -cargo when --channel=beta build --verbose $TGT $1 && \ -cargo when --channel=nightly build --verbose --features nightly $TGT $1 && \ + +cargo build --verbose $TGT $1 rm -f youki cp target/$TARGET/$VERSION/youki . From 4909c3b50abe94d88be05796c00286bc3c57fd0d Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 19 Jun 2021 13:43:51 +0200 Subject: [PATCH 18/70] Address review comments - should_use_rootless doesn't need Result type - add warning regarding current rootless limitations - make lookup_map_binary more concise --- src/create.rs | 5 ++++- src/main.rs | 2 +- src/rootless.rs | 20 ++++++++------------ 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/create.rs b/src/create.rs index 20b97e27a..b24a73700 100644 --- a/src/create.rs +++ b/src/create.rs @@ -132,8 +132,11 @@ fn run_container>( let linux = spec.linux.as_ref().unwrap(); let namespaces: Namespaces = linux.namespaces.clone().into(); - let rootless = if let Ok(true) = should_use_rootless() { + let rootless = if should_use_rootless() { log::debug!("rootless container should be created"); + log::warn!( + "resource constraints and multi id mapping is unimplemented for rootless containers" + ); rootless::validate(&spec)?; let mut rootless = Rootless::from(linux); if let Some((uid_binary, gid_binary)) = lookup_map_binaries(linux)? { diff --git a/src/main.rs b/src/main.rs index cca26625e..d8ceb7884 100644 --- a/src/main.rs +++ b/src/main.rs @@ -80,7 +80,7 @@ fn main() -> Result<()> { eprintln!("log init failed: {:?}", e); } - let root_path = if should_use_rootless()? && opts.root.eq(&PathBuf::from("/run/youki")) { + let root_path = if should_use_rootless() && opts.root.eq(&PathBuf::from("/run/youki")) { PathBuf::from("/tmp/rootless") } else { PathBuf::from(&opts.root) diff --git a/src/rootless.rs b/src/rootless.rs index 8918b0289..799b39fa8 100644 --- a/src/rootless.rs +++ b/src/rootless.rs @@ -30,16 +30,16 @@ impl From<&Linux> for Rootless { } /// Checks if rootless mode should be used -pub fn should_use_rootless() -> Result { +pub fn should_use_rootless() -> bool { if !nix::unistd::geteuid().is_root() { - return Ok(true); + return true; } if let Ok("true") = std::env::var("YOUKI_USE_ROOTLESS").as_deref() { - return Ok(true); + return true; } - Ok(false) + false } /// Validates that the spec contains the required information for @@ -110,12 +110,8 @@ pub fn lookup_map_binaries(spec: &Linux) -> Result> { fn lookup_map_binary(binary: &str) -> Result> { let paths = env::var("PATH")?; - for p in paths.split_terminator(':') { - let binary_path = PathBuf::from(p).join(binary); - if binary_path.exists() { - return Ok(Some(binary_path)); - } - } - - Ok(None) + Ok(paths + .split_terminator(':') + .find(|p| PathBuf::from(p).join(binary).exists()) + .map(PathBuf::from)) } From a0da5378a19de150e79a968e1db47d1f1d292bcf Mon Sep 17 00:00:00 2001 From: Siva Renganathan Date: Sun, 20 Jun 2021 06:02:02 +0530 Subject: [PATCH 19/70] Add unit tests for tty module Reuse tmpdir implementation from cgroup --- Cargo.lock | 87 ++++++++++++++++++++++++++++ Cargo.toml | 1 + src/cgroups/test.rs | 53 +---------------- src/cgroups/v1/devices.rs | 3 +- src/cgroups/v1/freezer.rs | 3 +- src/cgroups/v1/hugetlb.rs | 3 +- src/cgroups/v1/memory.rs | 3 +- src/cgroups/v1/network_classifier.rs | 4 +- src/cgroups/v1/network_priority.rs | 3 +- src/cgroups/v1/pids.rs | 4 +- src/cgroups/v2/cpu.rs | 3 +- src/tty.rs | 76 ++++++++++++++++++++++++ src/utils.rs | 51 ++++++++++++++++ 13 files changed, 233 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8578fa343..a9a81ed7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -324,6 +324,15 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "instant" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +dependencies = [ + "cfg-if", +] + [[package]] name = "itoa" version = "0.4.7" @@ -342,6 +351,15 @@ version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "789da6d93f1b866ffe175afc5322a4d76c038605a1c3319bb57b06967ca98a36" +[[package]] +name = "lock_api" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0382880606dff6d15c9476c416d18690b72742aa7b605bb6dd6ec9030fbf07eb" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.14" @@ -485,6 +503,31 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" +[[package]] +name = "parking_lot" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + [[package]] name = "pin-project-lite" version = "0.2.6" @@ -605,6 +648,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "redox_syscall" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.5.4" @@ -628,6 +680,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "serde" version = "1.0.126" @@ -659,12 +717,40 @@ dependencies = [ "serde", ] +[[package]] +name = "serial_test" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0bccbcf40c8938196944a3da0e133e031a33f4d6b72db3bda3cc556e361905d" +dependencies = [ + "lazy_static", + "parking_lot", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2acd6defeddb41eb60bb468f8825d0cfd0c2a76bc03bfd235b6a1dc4f6a1ad5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "slab" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f173ac3d1a7e3b28003f40de0b5ce7fe2710f9b9dc3fc38664cebee46b3b6527" +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + [[package]] name = "strsim" version = "0.10.0" @@ -818,4 +904,5 @@ dependencies = [ "regex", "serde", "serde_json", + "serial_test", ] diff --git a/Cargo.toml b/Cargo.toml index 6ed6f9b10..cfec9e42b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,3 +26,4 @@ oci_spec = { version = "0.1.0", path = "./oci_spec" } [dev-dependencies] oci_spec = { version = "0.1.0", path = "./oci_spec", features = ["proptests"] } quickcheck = "1" +serial_test = "0.5.1" diff --git a/src/cgroups/test.rs b/src/cgroups/test.rs index 065986deb..ffad8b3ed 100644 --- a/src/cgroups/test.rs +++ b/src/cgroups/test.rs @@ -2,58 +2,14 @@ use anyhow::Result; use std::{ - fs, - io::Write, - ops::Deref, + io::Write, path::{Path, PathBuf}, }; use oci_spec::LinuxCpu; -pub struct TempDir { - path: Option, -} - -impl TempDir { - pub fn new>(path: P) -> Result { - let p = path.into(); - std::fs::create_dir_all(&p)?; - Ok(Self { path: Some(p) }) - } - - pub fn path(&self) -> &Path { - self.path - .as_ref() - .expect("temp dir has already been removed") - } - - pub fn remove(&mut self) { - if let Some(p) = &self.path { - let _ = fs::remove_dir_all(p); - self.path = None; - } - } -} - -impl Drop for TempDir { - fn drop(&mut self) { - self.remove(); - } -} - -impl AsRef for TempDir { - fn as_ref(&self) -> &Path { - self.path() - } -} +use crate::utils::{create_temp_dir, TempDir}; -impl Deref for TempDir { - type Target = Path; - - fn deref(&self) -> &Self::Target { - self.path() - } -} pub fn setup(testname: &str, cgroup_file: &str) -> (TempDir, PathBuf) { let tmp = create_temp_dir(testname).expect("create temp directory for test"); @@ -76,11 +32,6 @@ pub fn set_fixture(temp_dir: &Path, filename: &str, val: &str) -> Result Result { - let dir = TempDir::new(std::env::temp_dir().join(test_name))?; - Ok(dir) -} - pub struct LinuxCpuBuilder { resource: LinuxCpu, } diff --git a/src/cgroups/v1/devices.rs b/src/cgroups/v1/devices.rs index ce49d2903..2599ed06a 100644 --- a/src/cgroups/v1/devices.rs +++ b/src/cgroups/v1/devices.rs @@ -98,7 +98,8 @@ impl Devices { #[cfg(test)] mod tests { use super::*; - use crate::cgroups::test::{create_temp_dir, set_fixture}; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; use oci_spec::{LinuxDeviceCgroup, LinuxDeviceType}; use std::fs::read_to_string; diff --git a/src/cgroups/v1/freezer.rs b/src/cgroups/v1/freezer.rs index d5fea3cb7..fbe02192b 100644 --- a/src/cgroups/v1/freezer.rs +++ b/src/cgroups/v1/freezer.rs @@ -116,7 +116,8 @@ impl Freezer { #[cfg(test)] mod tests { use super::*; - use crate::cgroups::test::{create_temp_dir, set_fixture}; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; use oci_spec::FreezerState; #[test] diff --git a/src/cgroups/v1/hugetlb.rs b/src/cgroups/v1/hugetlb.rs index ad363a821..d36b68516 100644 --- a/src/cgroups/v1/hugetlb.rs +++ b/src/cgroups/v1/hugetlb.rs @@ -58,7 +58,8 @@ impl Hugetlb { #[cfg(test)] mod tests { use super::*; - use crate::cgroups::test::{create_temp_dir, set_fixture}; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; use oci_spec::LinuxHugepageLimit; use std::fs::read_to_string; diff --git a/src/cgroups/v1/memory.rs b/src/cgroups/v1/memory.rs index 9612f81dd..8f4cb6b0c 100644 --- a/src/cgroups/v1/memory.rs +++ b/src/cgroups/v1/memory.rs @@ -239,7 +239,8 @@ impl Memory { #[cfg(test)] mod tests { use super::*; - use crate::cgroups::test::{create_temp_dir, set_fixture}; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; use oci_spec::LinuxMemory; #[test] diff --git a/src/cgroups/v1/network_classifier.rs b/src/cgroups/v1/network_classifier.rs index 0ecd9c873..d7bf1f4ee 100644 --- a/src/cgroups/v1/network_classifier.rs +++ b/src/cgroups/v1/network_classifier.rs @@ -36,8 +36,8 @@ impl NetworkClassifier { #[cfg(test)] mod tests { - use crate::cgroups::test::{create_temp_dir, set_fixture}; - + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; use super::*; #[test] diff --git a/src/cgroups/v1/network_priority.rs b/src/cgroups/v1/network_priority.rs index d12c66fe5..05f6c3bf6 100644 --- a/src/cgroups/v1/network_priority.rs +++ b/src/cgroups/v1/network_priority.rs @@ -36,7 +36,8 @@ impl NetworkPriority { #[cfg(test)] mod tests { use super::*; - use crate::cgroups::test::{create_temp_dir, set_fixture}; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; use oci_spec::LinuxInterfacePriority; #[test] diff --git a/src/cgroups/v1/pids.rs b/src/cgroups/v1/pids.rs index bb2f3af45..577090778 100644 --- a/src/cgroups/v1/pids.rs +++ b/src/cgroups/v1/pids.rs @@ -46,8 +46,8 @@ impl Pids { #[cfg(test)] mod tests { - use crate::cgroups::test::{create_temp_dir, set_fixture}; - + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; use super::*; use oci_spec::LinuxPids; diff --git a/src/cgroups/v2/cpu.rs b/src/cgroups/v2/cpu.rs index ae39f7670..184be42ba 100644 --- a/src/cgroups/v2/cpu.rs +++ b/src/cgroups/v2/cpu.rs @@ -86,7 +86,8 @@ impl Cpu { #[cfg(test)] mod tests { use super::*; - use crate::cgroups::test::{create_temp_dir, set_fixture, setup, LinuxCpuBuilder}; + use crate::cgroups::test::{set_fixture, setup, LinuxCpuBuilder}; + use crate::utils::create_temp_dir; use std::fs; #[test] diff --git a/src/tty.rs b/src/tty.rs index b6bf4be34..40407cc2d 100644 --- a/src/tty.rs +++ b/src/tty.rs @@ -68,3 +68,79 @@ pub fn setup_console(console_fd: FileDescriptor) -> Result<()> { close(console_fd.as_raw_fd())?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + use std::env; + use std::fs::{self, File}; + use std::os::unix::net::UnixListener; + use std::path::PathBuf; + + use serial_test::serial; + + use crate::utils::{create_temp_dir, TempDir}; + + + fn setup(testname: &str) -> Result<(TempDir, PathBuf, PathBuf)> { + let testdir = create_temp_dir(testname)?; + let rundir_path = Path::join(&testdir, "run"); + let _ = fs::create_dir(&rundir_path)?; + let socket_path = Path::new(&rundir_path).join("socket"); + let _ = File::create(&socket_path); + env::set_current_dir(&testdir)?; + Ok((testdir, rundir_path, socket_path)) + } + + + #[test] + #[serial] + fn test_setup_console_socket() { + let init = setup("test_setup_console_socket"); + assert!(init.is_ok()); + let (testdir, rundir_path, socket_path) = init.unwrap(); + let lis = UnixListener::bind(Path::join(&testdir, "console-socket")); + assert!(lis.is_ok()); + let fd = setup_console_socket(&&rundir_path, &socket_path); + assert!(fd.is_ok()); + assert_ne!(fd.unwrap().as_raw_fd(), -1); + } + + #[test] + #[serial] + fn test_setup_console_socket_empty() { + let init = setup("test_setup_console_socket_empty"); + assert!(init.is_ok()); + let (_testdir, rundir_path, socket_path) = init.unwrap(); + let fd = setup_console_socket(&rundir_path, &socket_path); + assert!(fd.is_ok()); + assert_eq!(fd.unwrap().as_raw_fd(), -1); + } + + #[test] + #[serial] + fn test_setup_console_socket_invalid() { + let init = setup("test_setup_console_socket_invalid"); + assert!(init.is_ok()); + let (testdir, rundir_path, socket_path) = init.unwrap(); + let _socket = File::create(Path::join(&testdir, "console-socket")); + assert!(_socket.is_ok()); + let fd = setup_console_socket(&rundir_path, &socket_path); + assert!(fd.is_err()); + } + + #[test] + #[serial] + fn test_setup_console() { + let init = setup("test_setup_console"); + assert!(init.is_ok()); + let (testdir, rundir_path, socket_path) = init.unwrap(); + let lis = UnixListener::bind(Path::join(&testdir, "console-socket")); + assert!(lis.is_ok()); + let fd = setup_console_socket(&&rundir_path, &socket_path); + let status = setup_console(fd.unwrap()); + assert!(status.is_ok()); + } +} + diff --git a/src/utils.rs b/src/utils.rs index 441103dde..8489ecc89 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,6 +3,7 @@ use std::env; use std::ffi::CString; use std::fs; +use std::ops::Deref; use std::path::{Path, PathBuf}; use std::time::Duration; @@ -95,6 +96,56 @@ pub fn write_file, C: AsRef<[u8]>>(path: P, contents: C) -> Resul Ok(()) } +pub struct TempDir { + path: Option, +} + +impl TempDir { + pub fn new>(path: P) -> Result { + let p = path.into(); + std::fs::create_dir_all(&p)?; + Ok(Self { path: Some(p) }) + } + + pub fn path(&self) -> &Path { + self.path + .as_ref() + .expect("temp dir has already been removed") + } + + pub fn remove(&mut self) { + if let Some(p) = &self.path { + let _ = fs::remove_dir_all(p); + self.path = None; + } + } +} + +impl Drop for TempDir { + fn drop(&mut self) { + self.remove(); + } +} + +impl AsRef for TempDir { + fn as_ref(&self) -> &Path { + self.path() + } +} + +impl Deref for TempDir { + type Target = Path; + + fn deref(&self) -> &Self::Target { + self.path() + } +} + +pub fn create_temp_dir(test_name: &str) -> Result { + let dir = TempDir::new(std::env::temp_dir().join(test_name))?; + Ok(dir) +} + #[cfg(test)] mod tests { use super::*; From b20918121fea3477ef5a8cb0075527af250b8325 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 19 Jun 2021 23:25:33 +0200 Subject: [PATCH 20/70] Extend info cmd with version and os --- src/info.rs | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 42 +++------------------ 3 files changed, 111 insertions(+), 36 deletions(-) create mode 100644 src/info.rs diff --git a/src/info.rs b/src/info.rs new file mode 100644 index 000000000..b726e25d5 --- /dev/null +++ b/src/info.rs @@ -0,0 +1,104 @@ +use procfs::{CpuInfo, Meminfo}; +use std::{fs, path::Path}; + +use crate::cgroups; + +pub fn print_youki() { + println!("{:<18}{}", "Version", env!("CARGO_PKG_VERSION")); +} + +pub fn print_kernel() { + let uname = nix::sys::utsname::uname(); + println!("{:<18}{}", "Kernel-Release", uname.release()); + println!("{:<18}{}", "Kernel-Version", uname.version()); + println!("{:<18}{}", "Architecture", uname.machine()); +} + +// see https://www.freedesktop.org/software/systemd/man/os-release.html +pub fn print_os() { + if let Some(os) = try_read_os_from("/etc/os-release") { + println!("{:<18}{}", "Operating System", os); + } else if let Some(os) = try_read_os_from("/usr/lib/os-release") { + println!("{:<18}{}", "Operating System", os); + } +} + +fn try_read_os_from>(path: P) -> Option { + let os_release = path.as_ref(); + if !os_release.exists() { + return None; + } + + if let Ok(release_content) = fs::read_to_string(path) { + let pretty = find_parameter(&release_content, "PRETTY_NAME"); + + if let Some(pretty) = pretty { + return Some(pretty.trim_matches('"').to_owned()); + } + + let name = find_parameter(&release_content, "NAME"); + let version = find_parameter(&release_content, "VERSION"); + + if let (Some(name), Some(version)) = (name, version) { + return Some(format!( + "{} {}", + name.trim_matches('"'), + version.trim_matches('"') + )); + } + } + + None +} + +fn find_parameter<'a>(content: &'a str, param_name: &str) -> Option<&'a str> { + let param_value = content + .lines() + .find(|l| l.starts_with(param_name)) + .map(|l| l.split_terminator('=').last()); + + if let Some(Some(value)) = param_value { + return Some(value); + } + + None +} + +pub fn print_hardware() { + if let Ok(cpu_info) = CpuInfo::new() { + println!("{:<18}{}", "Cores", cpu_info.num_cores()); + } + + if let Ok(mem_info) = Meminfo::new() { + println!( + "{:<18}{}", + "Total Memory", + mem_info.mem_total / u64::pow(1024, 2) + ); + } +} + +pub fn print_cgroups() { + if let Ok(cgroup_fs) = cgroups::common::get_supported_cgroup_fs() { + let cgroup_fs: Vec = cgroup_fs.into_iter().map(|c| c.to_string()).collect(); + println!("{:<18}{}", "cgroup version", cgroup_fs.join(" and ")); + } + + println!("cgroup mounts"); + if let Ok(v1_mounts) = cgroups::v1::util::list_subsystem_mount_points() { + let mut v1_mounts: Vec = v1_mounts + .iter() + .map(|kv| format!(" {:<16}{}", kv.0, kv.1.display())) + .collect(); + + v1_mounts.sort(); + for cgroup_mount in v1_mounts { + println!("{}", cgroup_mount); + } + } + + let unified = cgroups::v2::util::get_unified_mount_point(); + if let Ok(mount_point) = unified { + println!(" {:<16}{}", "unified", mount_point.display()); + } +} diff --git a/src/lib.rs b/src/lib.rs index 75326ef25..3bb33ce15 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ pub mod cgroups; pub mod command; pub mod container; pub mod create; +pub mod info; pub mod logger; pub mod namespaces; pub mod notify_socket; diff --git a/src/main.rs b/src/main.rs index d8ceb7884..4df5f55d2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,7 @@ use nix::sys::signal as nix_signal; use youki::command::linux::LinuxCommand; use youki::container::{Container, ContainerStatus}; use youki::create; +use youki::info::{print_cgroups, print_hardware, print_kernel, print_os, print_youki}; use youki::rootless::should_use_rootless; use youki::signal; use youki::start; @@ -171,42 +172,11 @@ fn main() -> Result<()> { } SubCommand::Info => { - let uname = nix::sys::utsname::uname(); - println!("{:<18}{}", "Kernel-Release", uname.release()); - println!("{:<18}{}", "Kernel-Version", uname.version()); - println!("{:<18}{}", "Architecture", uname.machine()); - - let cpu_info = procfs::CpuInfo::new()?; - println!("{:<18}{}", "Cores", cpu_info.num_cores()); - let mem_info = procfs::Meminfo::new()?; - println!( - "{:<18}{}", - "Total Memory", - mem_info.mem_total / u64::pow(1024, 2) - ); - - let cgroup_fs: Vec = cgroups::common::get_supported_cgroup_fs()? - .into_iter() - .map(|c| c.to_string()) - .collect(); - println!("{:<18}{}", "cgroup version", cgroup_fs.join(" and ")); - - println!("cgroup mounts"); - let mut cgroup_v1_mounts: Vec = - cgroups::v1::util::list_subsystem_mount_points()? - .iter() - .map(|kv| format!(" {:<16}{:?}", kv.0, kv.1)) - .collect(); - - cgroup_v1_mounts.sort(); - for cgroup_mount in cgroup_v1_mounts { - println!("{}", cgroup_mount); - } - - let unified = cgroups::v2::util::get_unified_mount_point(); - if let Ok(mount_point) = unified { - println!(" {:<16}{:?}", "unified", mount_point); - } + print_youki(); + print_kernel(); + print_os(); + print_hardware(); + print_cgroups(); Ok(()) } From 4e4dcb8da176e3eece83a47e1a220e8f9508331a Mon Sep 17 00:00:00 2001 From: utam0k Date: Mon, 21 Jun 2021 23:04:46 +0900 Subject: [PATCH 21/70] Use `assert!` instead of `assert_eq!` when comparing a boolean. --- src/utils.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index 8489ecc89..1711de195 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -162,12 +162,9 @@ mod tests { #[test] fn test_join_absolute_path_error() { - assert_eq!( - PathBuf::from("sample/a/") - .join_absolute_path(&PathBuf::from("b/c")) - .is_err(), - true - ); + assert!(PathBuf::from("sample/a/") + .join_absolute_path(&PathBuf::from("b/c")) + .is_err(),); } #[test] From 8abb9f5d85fb2c237c8532fd34424ba43d7f1f0d Mon Sep 17 00:00:00 2001 From: Nimrod Shneor Date: Sun, 6 Jun 2021 06:50:18 +0300 Subject: [PATCH 22/70] Add support for systemd managed cgroups --- .github/workflows/main.yml | 4 + Cargo.lock | 124 ++++++++++- Cargo.toml | 2 + README.md | 21 +- src/cgroups/common.rs | 27 ++- src/cgroups/test.rs | 5 +- src/cgroups/v1/freezer.rs | 2 +- src/cgroups/v1/hugetlb.rs | 2 +- src/cgroups/v1/memory.rs | 2 +- src/cgroups/v1/network_classifier.rs | 2 +- src/cgroups/v1/pids.rs | 2 +- src/cgroups/v2/mod.rs | 2 + src/cgroups/v2/systemd_manager.rs | 304 +++++++++++++++++++++++++++ src/create.rs | 11 +- src/dbus/client.rs | 33 +++ src/dbus/mod.rs | 2 + src/lib.rs | 1 + src/main.rs | 13 +- src/tty.rs | 17 +- 19 files changed, 542 insertions(+), 34 deletions(-) create mode 100644 src/cgroups/v2/systemd_manager.rs create mode 100644 src/dbus/client.rs create mode 100644 src/dbus/mod.rs diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 68e805cd2..69650e881 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,6 +12,8 @@ jobs: steps: - uses: actions/checkout@v2 - run: rustup component add clippy + - run: sudo apt-get -y update + - run: sudo apt-get install -y pkg-config libsystemd-dev libdbus-glib-1-dev - uses: actions-rs/clippy-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} @@ -30,6 +32,8 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: stable + - run: sudo apt-get -y update + - run: sudo apt-get install -y pkg-config libsystemd-dev libdbus-glib-1-dev - run: cargo install cargo-when - name: Build run: ./build.sh diff --git a/Cargo.lock b/Cargo.lock index a9a81ed7c..6c5b12f7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -46,6 +46,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "build-env" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1522ac6ee801a11bf9ef3f80403f4ede6eb41291fac3dde3de09989679305f25" + [[package]] name = "byteorder" version = "1.4.3" @@ -69,6 +75,12 @@ version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787" +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.0" @@ -126,7 +138,27 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", +] + +[[package]] +name = "cstr-argument" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20bd4e8067c20c7c3a4dea759ef91d4b18418ddb5bd8837ef6e2f2f93ca7ccbb" +dependencies = [ + "cfg-if 0.1.10", + "memchr", +] + +[[package]] +name = "dbus" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f597e08dfa79b593f23bbfc7840b23b2c5aa2e3a98d8e68b67b5b9ff800dc0db" +dependencies = [ + "libc", + "libdbus-sys", ] [[package]] @@ -166,12 +198,39 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd3aec53de10fe96d7d8c565eb17f2c687bb5518a2ec453b5b1252964526abe0" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "crc32fast", "libc", "miniz_oxide", ] +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63f713f8b2aa9e24fec85b0e290c56caee12e3b6ae0aeeda238a75b28251afd6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "foreign-types-shared" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7684cf33bb7f28497939e8c7cf17e3e4e3b8d9a0080ffa4f8ae2f515442ee855" + [[package]] name = "futures" version = "0.3.15" @@ -279,7 +338,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi", ] @@ -330,7 +389,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -351,6 +410,26 @@ version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "789da6d93f1b866ffe175afc5322a4d76c038605a1c3319bb57b06967ca98a36" +[[package]] +name = "libdbus-sys" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc12a3bc971424edbbf7edaf6e5740483444db63aa8e23d3751ff12a30f306f0" +dependencies = [ + "pkg-config", +] + +[[package]] +name = "libsystemd-sys" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e03fd580bcecda68dcdcd5297085ade6a3dc552cd8b030d2b94a9b089ef7ab8" +dependencies = [ + "build-env", + "libc", + "pkg-config", +] + [[package]] name = "lock_api" version = "0.4.4" @@ -366,7 +445,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -424,7 +503,7 @@ checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2" dependencies = [ "bitflags", "cc", - "cfg-if", + "cfg-if 1.0.0", "libc", ] @@ -436,7 +515,7 @@ checksum = "5c3728fec49d363a50a8828a190b379a446cc5cf085c06259bbbeb34447e4ec7" dependencies = [ "bitflags", "cc", - "cfg-if", + "cfg-if 1.0.0", "libc", "memoffset", ] @@ -520,7 +599,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "instant", "libc", "redox_syscall", @@ -540,6 +619,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c" + [[package]] name = "prctl" version = "1.0.0" @@ -768,6 +853,21 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "systemd" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f722cabda922e471742300045f56dbaa53fafbb4520fca304e51258019bfe91d" +dependencies = [ + "cstr-argument", + "foreign-types", + "libc", + "libsystemd-sys", + "log", + "memchr", + "utf8-cstr", +] + [[package]] name = "termcolor" version = "1.1.2" @@ -834,6 +934,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" +[[package]] +name = "utf8-cstr" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55bcbb425141152b10d5693095950b51c3745d019363fc2929ffd8f61449b628" + [[package]] name = "vec_map" version = "0.8.2" @@ -891,6 +997,7 @@ dependencies = [ "caps", "chrono", "clap", + "dbus", "futures", "libc", "log", @@ -905,4 +1012,5 @@ dependencies = [ "serde", "serde_json", "serial_test", + "systemd", ] diff --git a/Cargo.toml b/Cargo.toml index cfec9e42b..afba5c345 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,8 @@ once_cell = "1.6.0" futures = { version = "0.3", features = ["thread-pool"] } regex = "1.5" oci_spec = { version = "0.1.0", path = "./oci_spec" } +systemd = { version = "0.8", default-features = false } +dbus = "0.9.2" [dev-dependencies] oci_spec = { version = "0.1.0", path = "./oci_spec", features = ["proptests"] } diff --git a/README.md b/README.md index 374219f7a..3efcd974f 100644 --- a/README.md +++ b/README.md @@ -40,12 +40,29 @@ For other platforms, please use the devcontainer that we prepared. - Rust(See [here](https://www.rust-lang.org/tools/install)) - Docker(See [here](https://docs.docker.com/engine/install)) -## Building +## Dependencies +```sh +$ cargo install cargo-when +``` + +### Debian, Ubuntu and related distributions +```sh +$ sudo dnf install \ + pkg-config \ + libsystemd-dev \ + libdbus-glib-1-dev +``` + +### Fedora, Centos, RHEL and related distributions ```sh -$ cargo install cargo-when # installs prerequisite for building youki +$ sudo dnf install \ + pkg-config \ + systemd-dev \ + dbus-devel ``` +## Build ```sh $ git clone git@github.com:containers/youki.git $ cd youki diff --git a/src/cgroups/common.rs b/src/cgroups/common.rs index 56f3b1a57..26f626805 100644 --- a/src/cgroups/common.rs +++ b/src/cgroups/common.rs @@ -6,11 +6,11 @@ use std::{ path::{Path, PathBuf}, }; - use anyhow::{bail, Context, Result}; use nix::unistd::Pid; use oci_spec::LinuxResources; use procfs::process::Process; +use systemd::daemon::booted; use crate::cgroups::v1; use crate::cgroups::v2; @@ -91,7 +91,10 @@ pub fn get_supported_cgroup_fs() -> Result> { Ok(cgroups) } -pub fn create_cgroup_manager>(cgroup_path: P) -> Result> { +pub fn create_cgroup_manager>( + cgroup_path: P, + systemd_cgroup: bool, +) -> Result> { let cgroup_mount = Process::myself()? .mountinfo()? .into_iter() @@ -109,6 +112,16 @@ pub fn create_cgroup_manager>(cgroup_path: P) -> Result { log::info!("cgroup manager V2 will be used"); + if systemd_cgroup { + if !booted()? { + bail!("systemd cgroup flag passed, but systemd support for managing cgroups is not available"); + } + log::info!("systemd cgroup manager will be used"); + return Ok(Box::new(v2::SystemDCGroupManager::new( + cgroup2.mount_point, + cgroup_path.into(), + )?)); + } Ok(Box::new(v2::manager::Manager::new( cgroup2.mount_point, cgroup_path.into(), @@ -119,6 +132,16 @@ pub fn create_cgroup_manager>(cgroup_path: P) -> Result { log::info!("cgroup manager V2 will be used"); + if systemd_cgroup { + if !booted()? { + bail!("systemd cgroup flag passed, but systemd support for managing cgroups is not available"); + } + log::info!("systemd cgroup manager will be used"); + return Ok(Box::new(v2::SystemDCGroupManager::new( + cgroup2.mount_point, + cgroup_path.into(), + )?)); + } Ok(Box::new(v2::manager::Manager::new( cgroup2.mount_point, cgroup_path.into(), diff --git a/src/cgroups/test.rs b/src/cgroups/test.rs index ffad8b3ed..57ad71ef8 100644 --- a/src/cgroups/test.rs +++ b/src/cgroups/test.rs @@ -2,14 +2,13 @@ use anyhow::Result; use std::{ - io::Write, + io::Write, path::{Path, PathBuf}, }; use oci_spec::LinuxCpu; -use crate::utils::{create_temp_dir, TempDir}; - +use crate::utils::{create_temp_dir, TempDir}; pub fn setup(testname: &str, cgroup_file: &str) -> (TempDir, PathBuf) { let tmp = create_temp_dir(testname).expect("create temp directory for test"); diff --git a/src/cgroups/v1/freezer.rs b/src/cgroups/v1/freezer.rs index fbe02192b..5ea995e93 100644 --- a/src/cgroups/v1/freezer.rs +++ b/src/cgroups/v1/freezer.rs @@ -117,7 +117,7 @@ impl Freezer { mod tests { use super::*; use crate::cgroups::test::set_fixture; - use crate::utils::create_temp_dir; + use crate::utils::create_temp_dir; use oci_spec::FreezerState; #[test] diff --git a/src/cgroups/v1/hugetlb.rs b/src/cgroups/v1/hugetlb.rs index d36b68516..ad1a2604b 100644 --- a/src/cgroups/v1/hugetlb.rs +++ b/src/cgroups/v1/hugetlb.rs @@ -59,7 +59,7 @@ impl Hugetlb { mod tests { use super::*; use crate::cgroups::test::set_fixture; - use crate::utils::create_temp_dir; + use crate::utils::create_temp_dir; use oci_spec::LinuxHugepageLimit; use std::fs::read_to_string; diff --git a/src/cgroups/v1/memory.rs b/src/cgroups/v1/memory.rs index 8f4cb6b0c..f00a7f450 100644 --- a/src/cgroups/v1/memory.rs +++ b/src/cgroups/v1/memory.rs @@ -240,7 +240,7 @@ impl Memory { mod tests { use super::*; use crate::cgroups::test::set_fixture; - use crate::utils::create_temp_dir; + use crate::utils::create_temp_dir; use oci_spec::LinuxMemory; #[test] diff --git a/src/cgroups/v1/network_classifier.rs b/src/cgroups/v1/network_classifier.rs index d7bf1f4ee..88da25653 100644 --- a/src/cgroups/v1/network_classifier.rs +++ b/src/cgroups/v1/network_classifier.rs @@ -36,9 +36,9 @@ impl NetworkClassifier { #[cfg(test)] mod tests { + use super::*; use crate::cgroups::test::set_fixture; use crate::utils::create_temp_dir; - use super::*; #[test] fn test_apply_network_classifier() { diff --git a/src/cgroups/v1/pids.rs b/src/cgroups/v1/pids.rs index 577090778..e41153db1 100644 --- a/src/cgroups/v1/pids.rs +++ b/src/cgroups/v1/pids.rs @@ -46,9 +46,9 @@ impl Pids { #[cfg(test)] mod tests { + use super::*; use crate::cgroups::test::set_fixture; use crate::utils::create_temp_dir; - use super::*; use oci_spec::LinuxPids; #[test] diff --git a/src/cgroups/v2/mod.rs b/src/cgroups/v2/mod.rs index cea672f02..a3c10f481 100644 --- a/src/cgroups/v2/mod.rs +++ b/src/cgroups/v2/mod.rs @@ -7,4 +7,6 @@ mod io; pub mod manager; mod memory; mod pids; +pub mod systemd_manager; pub mod util; +pub use systemd_manager::SystemDCGroupManager; diff --git a/src/cgroups/v2/systemd_manager.rs b/src/cgroups/v2/systemd_manager.rs new file mode 100644 index 000000000..6e01e3050 --- /dev/null +++ b/src/cgroups/v2/systemd_manager.rs @@ -0,0 +1,304 @@ +use std::{ + fs::{self}, + os::unix::fs::PermissionsExt, +}; + +use anyhow::{anyhow, bail, Result}; +use nix::unistd::Pid; +use oci_spec::LinuxResources; +use std::path::{Path, PathBuf}; + +use super::{cpu::Cpu, cpuset::CpuSet, hugetlb::HugeTlb, io::Io, memory::Memory, pids::Pids}; +use crate::cgroups::common; +use crate::cgroups::common::CgroupManager; +use crate::cgroups::v2::controller::Controller; +use crate::cgroups::v2::controller_type::ControllerType; +use crate::utils::PathBufExt; + +const CGROUP_PROCS: &str = "cgroup.procs"; +const CGROUP_CONTROLLERS: &str = "cgroup.controllers"; +const CGROUP_SUBTREE_CONTROL: &str = "cgroup.subtree_control"; + +// v2 systemd only supports cpu, io, memory and pids. +const CONTROLLER_TYPES: &[ControllerType] = &[ + ControllerType::Cpu, + ControllerType::Io, + ControllerType::Memory, + ControllerType::Pids, +]; + +/// SystemDCGroupManager is a driver for managing cgroups via systemd. +pub struct SystemDCGroupManager { + root_path: PathBuf, + cgroups_path: CgroupsPath, +} + +/// Represents the systemd cgroups path: +/// It should be of the form [slice]:[scope_prefix]:[name]. +/// The slice is the "parent" and should be expanded properly, +/// see expand_slice below. +struct CgroupsPath { + parent: String, + scope: String, + name: String, +} + +impl SystemDCGroupManager { + pub fn new(root_path: PathBuf, cgroups_path: PathBuf) -> Result { + // cgroups path may never be empty as it is defaulted to `/youki` + // see 'get_cgroup_path' under utils.rs. + // if cgroups_path was provided it should be of the form [slice]:[scope_prefix]:[name], + // for example: "system.slice:docker:1234". + let mut parent = ""; + let scope; + let name; + if cgroups_path.starts_with("/youki") { + scope = "youki"; + name = cgroups_path + .strip_prefix("/youki/")? + .to_str() + .ok_or_else(|| anyhow!("Failed to parse cgroupsPath field."))?; + } else { + let parts = cgroups_path + .to_str() + .ok_or_else(|| anyhow!("Failed to parse cgroupsPath field."))? + .split(':') + .collect::>(); + parent = parts[0]; + scope = parts[1]; + name = parts[2]; + } + + // TODO: create the systemd unit using a dbus client. + + Ok(SystemDCGroupManager { + root_path, + cgroups_path: CgroupsPath { + parent: parent.to_owned(), + scope: scope.to_owned(), + name: name.to_owned(), + }, + }) + } + + /// get_unit_name returns the unit (scope) name from the path provided by the user + /// for example: foo:docker:bar returns in '/docker-bar.scope' + fn get_unit_name(&self) -> String { + // By default we create a scope unless specified explicitly. + if !self.cgroups_path.name.ends_with(".slice") { + return format!( + "{}-{}.scope", + self.cgroups_path.scope, self.cgroups_path.name + ); + } + self.cgroups_path.name.clone() + } + + // systemd represents slice hierarchy using `-`, so we need to follow suit when + // generating the path of slice. For example, 'test-a-b.slice' becomes + // '/test.slice/test-a.slice/test-a-b.slice'. + fn expand_slice(&self, slice: &str) -> Result { + let suffix = ".slice"; + if slice.len() <= suffix.len() || !slice.ends_with(suffix) { + bail!("invalid slice name: {}", slice); + } + if slice.contains('/') { + bail!("invalid slice name: {}", slice); + } + let mut path = "".to_owned(); + let mut prefix = "".to_owned(); + let slice_name = slice.trim_end_matches(suffix); + // if input was -.slice, we should just return root now + if slice_name == "-" { + return Ok(Path::new("/").to_path_buf()); + } + for component in slice_name.split('-') { + if component.is_empty() { + anyhow!("Invalid slice name: {}", slice); + } + // Append the component to the path and to the prefix. + path = format!("{}/{}{}{}", path, prefix, component, suffix); + prefix = format!("{}{}-", prefix, component); + } + Ok(Path::new(&path).to_path_buf()) + } + + // get_cgroups_path generates a cgroups path from the one provided by the user via cgroupsPath. + // an example of the final path: "/machine.slice/docker-foo.scope" + fn get_cgroups_path(&self) -> Result { + // the root slice is under 'machine.slice'. + let mut slice = Path::new("/machine.slice").to_path_buf(); + // if the user provided a '.slice' (as in a branch of a tree) + // we need to "unpack it". + if !self.cgroups_path.parent.is_empty() { + slice = self.expand_slice(&self.cgroups_path.parent)?; + } + let unit_name = self.get_unit_name(); + let cgroups_path = slice.join(unit_name); + Ok(cgroups_path) + } + + /// create_unified_cgroup verifies sure that *each level* in the downward path from the root cgroup + /// down to the cgroup_path provided by the user is a valid cgroup hierarchy, + /// containing the attached controllers and that it contains the container pid. + fn create_unified_cgroup(&self, pid: Pid) -> Result { + let cgroups_path = self.get_cgroups_path()?; + let full_path = self.root_path.join_absolute_path(&cgroups_path)?; + let controllers: Vec = self + .get_available_controllers(&self.root_path)? + .into_iter() + .map(|c| format!("{}{}", "+", c.to_string())) + .collect(); + + // Write the controllers to the root_path. + Self::write_controllers(&self.root_path, &controllers)?; + + let mut current_path = self.root_path.clone(); + let mut components = cgroups_path.components().skip(1).peekable(); + // Verify that *each level* in the downward path from the root cgroup + // down to the cgroup_path provided by the user is a valid cgroup hierarchy. + // containing the attached controllers. + while let Some(component) = components.next() { + current_path = current_path.join(component); + if !current_path.exists() { + fs::create_dir(¤t_path)?; + fs::metadata(¤t_path)?.permissions().set_mode(0o755); + } + + // last component cannot have subtree_control enabled due to internal process constraint + // if this were set, writing to the cgroups.procs file will fail with Erno 16 (device or resource busy) + if components.peek().is_some() { + Self::write_controllers(¤t_path, &controllers)?; + } + } + + common::write_cgroup_file(full_path.join(CGROUP_PROCS), &pid)?; + Ok(full_path) + } + + fn get_available_controllers>( + &self, + cgroups_path: P, + ) -> Result> { + let controllers_path = self.root_path.join(cgroups_path).join(CGROUP_CONTROLLERS); + if !controllers_path.exists() { + bail!( + "cannot get available controllers. {:?} does not exist", + controllers_path + ) + } + + let mut controllers = Vec::new(); + for controller in fs::read_to_string(&controllers_path)?.split_whitespace() { + match controller { + "cpu" => controllers.push(ControllerType::Cpu), + "io" => controllers.push(ControllerType::Io), + "memory" => controllers.push(ControllerType::Memory), + "pids" => controllers.push(ControllerType::Pids), + _ => continue, + } + } + + Ok(controllers) + } + + fn write_controllers(path: &Path, controllers: &Vec) -> Result<()> { + for controller in controllers { + common::write_cgroup_file_str(path.join(CGROUP_SUBTREE_CONTROL), controller)?; + } + + Ok(()) + } +} + +impl CgroupManager for SystemDCGroupManager { + fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> { + // Dont attach any pid to the cgroup if -1 is specified as a pid + if pid.as_raw() == -1 { + return Ok(()); + } + let full_cgroup_path = self.create_unified_cgroup(pid)?; + + for controller in CONTROLLER_TYPES { + match controller { + ControllerType::Cpu => Cpu::apply(linux_resources, &full_cgroup_path)?, + ControllerType::CpuSet => CpuSet::apply(linux_resources, &full_cgroup_path)?, + ControllerType::HugeTlb => HugeTlb::apply(linux_resources, &&full_cgroup_path)?, + ControllerType::Io => Io::apply(linux_resources, &&full_cgroup_path)?, + ControllerType::Memory => Memory::apply(linux_resources, &full_cgroup_path)?, + ControllerType::Pids => Pids::apply(linux_resources, &&full_cgroup_path)?, + } + } + + Ok(()) + } + + fn remove(&self) -> Result<()> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn expand_slice_works() -> Result<()> { + let manager = SystemDCGroupManager::new( + PathBuf::from("/sys/fs/cgroup"), + PathBuf::from("test-a-b.slice:docker:foo"), + )?; + + assert_eq!( + manager.expand_slice("test-a-b.slice")?, + PathBuf::from("/test.slice/test-a.slice/test-a-b.slice"), + ); + + Ok(()) + } + + #[test] + fn get_cgroups_path_works_with_a_complex_slice() -> Result<()> { + let manager = SystemDCGroupManager::new( + PathBuf::from("/sys/fs/cgroup"), + PathBuf::from("test-a-b.slice:docker:foo"), + )?; + + assert_eq!( + manager.get_cgroups_path()?, + PathBuf::from("/test.slice/test-a.slice/test-a-b.slice/docker-foo.scope"), + ); + + Ok(()) + } + + #[test] + fn get_cgroups_path_works_with_a_simple_slice() -> Result<()> { + let manager = SystemDCGroupManager::new( + PathBuf::from("/sys/fs/cgroup"), + PathBuf::from("machine.slice:libpod:foo"), + )?; + + assert_eq!( + manager.get_cgroups_path()?, + PathBuf::from("/machine.slice/libpod-foo.scope"), + ); + + Ok(()) + } + + #[test] + fn get_cgroups_path_works_with_scope() -> Result<()> { + let manager = SystemDCGroupManager::new( + PathBuf::from("/sys/fs/cgroup"), + PathBuf::from(":docker:foo"), + )?; + + assert_eq!( + manager.get_cgroups_path()?, + PathBuf::from("/machine.slice/docker-foo.scope"), + ); + + Ok(()) + } +} diff --git a/src/create.rs b/src/create.rs index b24a73700..90b941139 100644 --- a/src/create.rs +++ b/src/create.rs @@ -46,7 +46,12 @@ pub struct Create { // associated with it like any other process. impl Create { /// Starts a new container process - pub fn exec(&self, root_path: PathBuf, command: impl Command) -> Result<()> { + pub fn exec( + &self, + root_path: PathBuf, + systemd_cgroup: bool, + command: impl Command, + ) -> Result<()> { // create a directory for the container to store state etc. // if already present, return error let bundle_canonicalized = fs::canonicalize(&self.bundle) @@ -102,6 +107,7 @@ impl Create { rootfs, spec, csocketfd, + systemd_cgroup, container, command, )?; @@ -121,6 +127,7 @@ fn run_container>( rootfs: PathBuf, spec: oci_spec::Spec, csocketfd: Option, + systemd_cgroup: bool, container: Container, command: impl Command, ) -> Result { @@ -149,7 +156,7 @@ fn run_container>( }; let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, container.id()); - let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path)?; + let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path, systemd_cgroup)?; // first fork, which creates process, which will later create actual container process match fork::fork_first(pid_file, rootless, linux, &container, cmanager)? { diff --git a/src/dbus/client.rs b/src/dbus/client.rs new file mode 100644 index 000000000..b0dc4afef --- /dev/null +++ b/src/dbus/client.rs @@ -0,0 +1,33 @@ +use anyhow::Result; +use dbus::blocking::Connection; +use std::time::Duration; +use std::vec::Vec; + +/// Client is a wrapper providing higher level API and abatraction around dbus. +/// For more information see https://www.freedesktop.org/wiki/Software/systemd/dbus/ +pub struct Client { + conn: Connection, +} + +impl Client { + pub fn new() -> Result { + let conn = Connection::new_session()?; + Ok(Client { conn }) + } + + /// start_unit starts a specific unit under systemd. See https://www.freedesktop.org/wiki/Software/systemd/dbus + /// for more details. + pub fn start_unit(&self, unit_name: &str, _properties: Vec<&str>) -> Result<()> { + let proxy = self.conn.with_proxy( + "org.freedesktop.systemd1.Manager", + "/", + Duration::from_millis(5000), + ); + let (_job_id,): (i32,) = proxy.method_call( + "org.freedesktop.systemd1.Manager", + "StartTransientUnit", + (unit_name, "replace"), + )?; + Ok(()) + } +} diff --git a/src/dbus/mod.rs b/src/dbus/mod.rs new file mode 100644 index 000000000..e99ee79b9 --- /dev/null +++ b/src/dbus/mod.rs @@ -0,0 +1,2 @@ +mod client; +pub use client::Client; diff --git a/src/lib.rs b/src/lib.rs index 3bb33ce15..b8509f676 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,7 @@ pub mod cgroups; pub mod command; pub mod container; pub mod create; +pub mod dbus; pub mod info; pub mod logger; pub mod namespaces; diff --git a/src/main.rs b/src/main.rs index 4df5f55d2..6dd777ea1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,6 +33,9 @@ struct Opts { log: Option, #[clap(long)] log_format: Option, + /// Enable systemd cgroup manager, rather then use the cgroupfs directly. + #[clap(short, long)] + systemd_cgroup: bool, /// command to actually manage container #[clap(subcommand)] subcmd: SubCommand, @@ -47,6 +50,9 @@ pub struct Kill { #[derive(Clap, Debug)] pub struct Delete { container_id: String, + // forces deletion of the container. + #[clap(short, long)] + force: bool, } #[derive(Clap, Debug)] @@ -88,8 +94,10 @@ fn main() -> Result<()> { }; fs::create_dir_all(&root_path)?; + let systemd_cgroup = opts.systemd_cgroup; + match opts.subcmd { - SubCommand::Create(create) => create.exec(root_path, LinuxCommand), + SubCommand::Create(create) => create.exec(root_path, systemd_cgroup, LinuxCommand), SubCommand::Start(start) => start.exec(root_path), SubCommand::Kill(kill) => { // resolves relative paths, symbolic links etc. and get complete path @@ -151,7 +159,8 @@ fn main() -> Result<()> { // remove the cgroup created for the container // check https://man7.org/linux/man-pages/man7/cgroups.7.html // creating and removing cgroups section for more information on cgroups - let cmanager = cgroups::common::create_cgroup_manager(cgroups_path)?; + let cmanager = + cgroups::common::create_cgroup_manager(cgroups_path, systemd_cgroup)?; cmanager.remove()?; } std::process::exit(0) diff --git a/src/tty.rs b/src/tty.rs index 40407cc2d..10b5de92b 100644 --- a/src/tty.rs +++ b/src/tty.rs @@ -72,17 +72,16 @@ pub fn setup_console(console_fd: FileDescriptor) -> Result<()> { #[cfg(test)] mod tests { use super::*; - + use std::env; use std::fs::{self, File}; use std::os::unix::net::UnixListener; use std::path::PathBuf; use serial_test::serial; - + use crate::utils::{create_temp_dir, TempDir}; - fn setup(testname: &str) -> Result<(TempDir, PathBuf, PathBuf)> { let testdir = create_temp_dir(testname)?; let rundir_path = Path::join(&testdir, "run"); @@ -93,11 +92,10 @@ mod tests { Ok((testdir, rundir_path, socket_path)) } - #[test] #[serial] fn test_setup_console_socket() { - let init = setup("test_setup_console_socket"); + let init = setup("test_setup_console_socket"); assert!(init.is_ok()); let (testdir, rundir_path, socket_path) = init.unwrap(); let lis = UnixListener::bind(Path::join(&testdir, "console-socket")); @@ -110,7 +108,7 @@ mod tests { #[test] #[serial] fn test_setup_console_socket_empty() { - let init = setup("test_setup_console_socket_empty"); + let init = setup("test_setup_console_socket_empty"); assert!(init.is_ok()); let (_testdir, rundir_path, socket_path) = init.unwrap(); let fd = setup_console_socket(&rundir_path, &socket_path); @@ -121,7 +119,7 @@ mod tests { #[test] #[serial] fn test_setup_console_socket_invalid() { - let init = setup("test_setup_console_socket_invalid"); + let init = setup("test_setup_console_socket_invalid"); assert!(init.is_ok()); let (testdir, rundir_path, socket_path) = init.unwrap(); let _socket = File::create(Path::join(&testdir, "console-socket")); @@ -133,7 +131,7 @@ mod tests { #[test] #[serial] fn test_setup_console() { - let init = setup("test_setup_console"); + let init = setup("test_setup_console"); assert!(init.is_ok()); let (testdir, rundir_path, socket_path) = init.unwrap(); let lis = UnixListener::bind(Path::join(&testdir, "console-socket")); @@ -141,6 +139,5 @@ mod tests { let fd = setup_console_socket(&&rundir_path, &socket_path); let status = setup_console(fd.unwrap()); assert!(status.is_ok()); - } + } } - From 886ec5db36473013213e1a1d4a6f218ba537f4cd Mon Sep 17 00:00:00 2001 From: utam0k Date: Tue, 22 Jun 2021 21:52:39 +0900 Subject: [PATCH 23/70] update README.md --- README.md | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 3efcd974f..6f1014617 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,21 @@ Here is why I am rewriting a new container runtime in Rust. youki is not at the practical stage yet. However, it is getting closer to practical use, running with docker and passing all the default tests provided by [opencontainers/runtime-tools](https://github.com/opencontainers/runtime-tools). ![youki demo](docs/demo.gif) +## Features + +- [x] run with docker +- [ ] run with podman(WIP on [#24](https://github.com/containers/youki/issues/24)) +- [x] pivot root +- [x] mount devices +- [x] namespaces +- [x] capabilities +- [x] rlimits +- [ ] cgroups v1(WIP on [#9](https://github.com/containers/youki/issues/9)) +- [ ] cgroups v2(WIP on [#78](https://github.com/containers/youki/issues/78)) +- [ ] seccomp(WIP on [#25](https://github.com/containers/youki/issues/25)) +- [ ] hooks(WIP on [#13](https://github.com/containers/youki/issues/13)) +- [ ] rootless(WIP on [#77](https://github.com/containers/youki/issues/77)) + # Getting Started Local build is only supported on linux. @@ -41,11 +56,13 @@ For other platforms, please use the devcontainer that we prepared. - Docker(See [here](https://docs.docker.com/engine/install)) ## Dependencies + ```sh $ cargo install cargo-when ``` ### Debian, Ubuntu and related distributions + ```sh $ sudo dnf install \ pkg-config \ @@ -53,8 +70,8 @@ $ sudo dnf install \ libdbus-glib-1-dev ``` - ### Fedora, Centos, RHEL and related distributions + ```sh $ sudo dnf install \ pkg-config \ @@ -63,6 +80,7 @@ $ sudo dnf install \ ``` ## Build + ```sh $ git clone git@github.com:containers/youki.git $ cd youki @@ -130,21 +148,6 @@ We also have an active [Discord](https://discord.gg/h7R3HgWUct) if you'd like to TBD(WIP on [#14](https://github.com/containers/youki/issues/14)) -# Features - -- [x] run with docker -- [ ] run with podman -- [x] pivot root -- [x] mount devices -- [x] namespaces -- [x] capabilities -- [x] rlimits -- [ ] cgroups v1(WIP on [#9](https://github.com/containers/youki/issues/9)) -- [ ] cgroups v2 -- [ ] seccomp -- [ ] hooks(WIP on [#13](https://github.com/containers/youki/issues/13)) -- [ ] rootless - # Contribution This project welcomes your PR and issues. From 68ca2ae34ff9f93acf9cc0bd4153648f038327a6 Mon Sep 17 00:00:00 2001 From: Nimrod Shneor Date: Wed, 23 Jun 2021 13:46:38 +0300 Subject: [PATCH 24/70] Fix README.md Fedora & Centos instructions --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6f1014617..1d6d59fc0 100644 --- a/README.md +++ b/README.md @@ -73,9 +73,9 @@ $ sudo dnf install \ ### Fedora, Centos, RHEL and related distributions ```sh -$ sudo dnf install \ - pkg-config \ - systemd-dev \ +$ sudo dnf install \ + pkg-config \ + systemd-devel \ dbus-devel ``` From 9d785aa5215c1541bdf22b33d2a52ebde15fe636 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Wed, 23 Jun 2021 18:10:54 +0200 Subject: [PATCH 25/70] Add list command --- Cargo.lock | 11 +++++ Cargo.toml | 3 +- src/command/command.rs | 13 +++++- src/command/linux.rs | 55 +++++++++++++++++++++++- src/command/mod.rs | 2 +- src/command/test.rs | 6 ++- src/container/container.rs | 86 ++++++++++++++++++++++++++++---------- src/container/state.rs | 23 ++++++++++ src/create.rs | 5 ++- src/main.rs | 62 ++++++++++++++++++++++++++- src/namespaces.rs | 15 ++++--- src/process/fork.rs | 3 +- src/start.rs | 2 +- 13 files changed, 248 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c5b12f7d..ad0c867f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,6 +96,7 @@ dependencies = [ "libc", "num-integer", "num-traits", + "serde", "time", "winapi", ] @@ -868,6 +869,15 @@ dependencies = [ "utf8-cstr", ] +[[package]] +name = "tabwriter" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36205cfc997faadcc4b0b87aaef3fbedafe20d38d4959a7ca6ff803564051111" +dependencies = [ + "unicode-width", +] + [[package]] name = "termcolor" version = "1.1.2" @@ -1013,4 +1023,5 @@ dependencies = [ "serde_json", "serial_test", "systemd", + "tabwriter", ] diff --git a/Cargo.toml b/Cargo.toml index afba5c345..d54821a2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,13 +17,14 @@ libc = "0.2.84" log = "0.4" anyhow = "1.0" mio = { version = "0.7", features = ["os-ext", "os-poll"] } -chrono = "0.4" +chrono = { version="0.4", features = ["serde"] } once_cell = "1.6.0" futures = { version = "0.3", features = ["thread-pool"] } regex = "1.5" oci_spec = { version = "0.1.0", path = "./oci_spec" } systemd = { version = "0.8", default-features = false } dbus = "0.9.2" +tabwriter = "1" [dev-dependencies] oci_spec = { version = "0.1.0", path = "./oci_spec", features = ["proptests"] } diff --git a/src/command/command.rs b/src/command/command.rs index 4ad6f3417..37fd66655 100644 --- a/src/command/command.rs +++ b/src/command/command.rs @@ -1,7 +1,7 @@ //! An interface trait so that rest of Youki can call //! necessary functions without having to worry about their //! implementation details -use std::{any::Any, path::Path}; +use std::{any::Any, ffi::OsStr, path::Path, sync::Arc}; use anyhow::Result; use caps::{errors::CapsError, CapSet, CapsHashSet}; @@ -12,6 +12,8 @@ use nix::{ use oci_spec::LinuxRlimit; +use crate::command::{linux::LinuxCommand, test::TestHelperCommand}; + /// This specifies various kernel/other functionalities required for /// container management pub trait Command { @@ -23,4 +25,13 @@ pub trait Command { fn set_capability(&self, cset: CapSet, value: &CapsHashSet) -> Result<(), CapsError>; fn set_hostname(&self, hostname: &str) -> Result<()>; fn set_rlimit(&self, rlimit: &LinuxRlimit) -> Result<()>; + fn get_pwuid(&self, uid: u32) -> Option>; +} + +pub fn create_command() -> Box { + if cfg!(test) { + Box::new(TestHelperCommand::default()) + } else { + Box::new(LinuxCommand) + } } diff --git a/src/command/linux.rs b/src/command/linux.rs index 71282ea4c..f835be246 100644 --- a/src/command/linux.rs +++ b/src/command/linux.rs @@ -1,8 +1,12 @@ //! Implements Command trait for Linux systems -use std::{any::Any, path::Path}; +use std::ffi::{CStr, OsStr}; +use std::os::unix::ffi::OsStrExt; +use std::sync::Arc; +use std::{any::Any, mem, path::Path, ptr}; use anyhow::{bail, Result}; use caps::{errors::CapsError, CapSet, CapsHashSet}; +use libc::{c_char, uid_t}; use nix::{ errno::Errno, unistd::{fchdir, pivot_root, sethostname}, @@ -27,6 +31,21 @@ use crate::capabilities; #[derive(Clone)] pub struct LinuxCommand; +impl LinuxCommand { + unsafe fn from_raw_buf<'a, T>(p: *const c_char) -> T + where + T: From<&'a OsStr>, + { + T::from(OsStr::from_bytes(CStr::from_ptr(p).to_bytes())) + } + + /// Reads data from the `c_passwd` and returns it as a `User`. + unsafe fn passwd_to_user(passwd: libc::passwd) -> Arc { + let name: Arc = Self::from_raw_buf(passwd.pw_name); + name + } +} + impl Command for LinuxCommand { /// To enable dynamic typing, /// see https://doc.rust-lang.org/std/any/index.html for more information @@ -118,4 +137,38 @@ impl Command for LinuxCommand { } Ok(()) } + + // taken from https://crates.io/crates/users + fn get_pwuid(&self, uid: uid_t) -> Option> { + let mut passwd = unsafe { mem::zeroed::() }; + let mut buf = vec![0; 2048]; + let mut result = ptr::null_mut::(); + + loop { + let r = unsafe { + libc::getpwuid_r(uid, &mut passwd, buf.as_mut_ptr(), buf.len(), &mut result) + }; + + if r != libc::ERANGE { + break; + } + + let newsize = buf.len().checked_mul(2)?; + buf.resize(newsize, 0); + } + + if result.is_null() { + // There is no such user, or an error has occurred. + // errno gets set if there’s an error. + return None; + } + + if result != &mut passwd { + // The result of getpwuid_r should be its input passwd. + return None; + } + + let user = unsafe { Self::passwd_to_user(result.read()) }; + Some(user) + } } diff --git a/src/command/mod.rs b/src/command/mod.rs index fa2fc01fe..08e0ee64d 100644 --- a/src/command/mod.rs +++ b/src/command/mod.rs @@ -3,7 +3,7 @@ //! to call syscalls required for container management #[allow(clippy::module_inception)] -mod command; +pub mod command; pub mod linux; pub mod test; diff --git a/src/command/test.rs b/src/command/test.rs index fe5540d14..a80e71649 100644 --- a/src/command/test.rs +++ b/src/command/test.rs @@ -1,4 +1,4 @@ -use std::{any::Any, cell::RefCell}; +use std::{any::Any, cell::RefCell, ffi::OsStr, sync::Arc}; use caps::{errors::CapsError, CapSet, CapsHashSet}; use nix::sched::CloneFlags; @@ -60,6 +60,10 @@ impl Command for TestHelperCommand { fn set_rlimit(&self, _rlimit: &LinuxRlimit) -> anyhow::Result<()> { todo!() } + + fn get_pwuid(&self, _: u32) -> Option> { + todo!() + } } impl TestHelperCommand { diff --git a/src/container/container.rs b/src/container/container.rs index 378d30096..b05017f1c 100644 --- a/src/container/container.rs +++ b/src/container/container.rs @@ -1,10 +1,16 @@ +use std::ffi::OsString; use std::fs; use std::path::{Path, PathBuf}; use anyhow::Result; +use chrono::DateTime; use nix::unistd::Pid; + +use chrono::Utc; use procfs::process::Process; +use crate::command::command::create_command; + use crate::container::{ContainerStatus, State}; /// Structure representing the container data @@ -39,7 +45,7 @@ impl Container { pub fn status(&self) -> ContainerStatus { self.state.status } - pub fn refresh_status(&self) -> Result { + pub fn refresh_status(&mut self) -> Result { let new_status = match self.pid() { Some(pid) => { // Note that Process::new does not spawn a new process @@ -60,11 +66,19 @@ impl Container { } None => ContainerStatus::Stopped, }; - self.update_status(new_status) + Ok(self.update_status(new_status)) + } + + pub fn refresh_state(&self) -> Result { + let state = State::load(&self.root)?; + Ok(Self { + state, + root: self.root.clone(), + }) } pub fn save(&self) -> Result<()> { - log::debug!("Sava container status: {:?} in {:?}", self, self.root); + log::debug!("Save container status: {:?} in {:?}", self, self.root); self.state.save(&self.root) } @@ -85,24 +99,50 @@ impl Container { } pub fn set_pid(&self, pid: i32) -> Self { - Self::new( - self.state.id.as_str(), - self.state.status, - Some(pid), - self.state.bundle.as_str(), - &self.root, - ) - .expect("unexpected error") - } - - pub fn update_status(&self, status: ContainerStatus) -> Result { - Self::new( - self.state.id.as_str(), - status, - self.state.pid, - self.state.bundle.as_str(), - &self.root, - ) + let mut new_state = self.state.clone(); + new_state.pid = Some(pid); + + Self { + state: new_state, + root: self.root.clone(), + } + } + + pub fn created(&self) -> Option> { + self.state.created + } + + pub fn set_creator(mut self, uid: u32) -> Self { + self.state.creator = Some(uid); + self + } + + pub fn creator(&self) -> Option { + if let Some(uid) = self.state.creator { + let command = create_command(); + let user_name = command.get_pwuid(uid); + if let Some(user_name) = user_name { + return Some((&*user_name).to_owned()); + } + } + + None + } + + pub fn update_status(&self, status: ContainerStatus) -> Self { + let created = match (status, self.state.created) { + (ContainerStatus::Created, None) => Some(Utc::now()), + _ => self.state.created, + }; + + let mut new_state = self.state.clone(); + new_state.created = created; + new_state.status = status; + + Self { + state: new_state, + root: self.root.clone(), + } } pub fn load(container_root: PathBuf) -> Result { @@ -112,4 +152,8 @@ impl Container { root: container_root, }) } + + pub fn bundle(&self) -> String { + self.state.bundle.clone() + } } diff --git a/src/container/state.rs b/src/container/state.rs index 30964c855..49bfa4274 100644 --- a/src/container/state.rs +++ b/src/container/state.rs @@ -1,9 +1,11 @@ //! Information about status and state of the container use std::collections::HashMap; +use std::fmt::Display; use std::fs; use std::{fs::File, path::Path}; use anyhow::Result; +use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; const STATE_FILE_PATH: &str = "state.json"; @@ -40,6 +42,19 @@ impl ContainerStatus { } } +impl Display for ContainerStatus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let print = match *self { + Self::Creating => "Creating", + Self::Created => "Created", + Self::Running => "Running", + Self::Stopped => "Stopped", + }; + + write!(f, "{}", print) + } +} + /// Stores the state information of the container #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "camelCase")] @@ -57,6 +72,12 @@ pub struct State { pub bundle: String, // Annotations are key values associated with the container. pub annotations: HashMap, + // Creation time of the container + #[serde(skip_serializing_if = "Option::is_none")] + pub created: Option>, + // User that created the container + #[serde(skip_serializing_if = "Option::is_none")] + pub creator: Option, } impl State { @@ -73,6 +94,8 @@ impl State { pid, bundle: bundle.to_string(), annotations: HashMap::default(), + created: None, + creator: None, } } diff --git a/src/create.rs b/src/create.rs index 90b941139..bf732880d 100644 --- a/src/create.rs +++ b/src/create.rs @@ -196,7 +196,10 @@ fn run_container>( // actually run the command / program to be run in container utils::do_exec(&spec_args[0], spec_args, envs)?; // the command / program is done executing - container.update_status(ContainerStatus::Stopped)?.save()?; + container + .refresh_state()? + .update_status(ContainerStatus::Stopped) + .save()?; Ok(Process::Init(init)) } diff --git a/src/main.rs b/src/main.rs index 6dd777ea1..5acec9290 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,14 +2,21 @@ //! Container Runtime written in Rust, inspired by [railcar](https://github.com/oracle/railcar) //! This crate provides a container runtime which can be used by a high-level container runtime to run containers. +use std::ffi::OsString; + use std::fs; +use std::io; +use std::io::Write; + use std::path::{Path, PathBuf}; use anyhow::{bail, Result}; +use chrono::{DateTime, Local}; use clap::Clap; use nix::sys::signal as nix_signal; use youki::command::linux::LinuxCommand; + use youki::container::{Container, ContainerStatus}; use youki::create; use youki::info::{print_cgroups, print_hardware, print_kernel, print_os, print_youki}; @@ -17,6 +24,7 @@ use youki::rootless::should_use_rootless; use youki::signal; use youki::start; +use tabwriter::TabWriter; use youki::cgroups; use youki::utils; @@ -76,6 +84,8 @@ enum SubCommand { State(StateArgs), #[clap(version = "0.0.1", author = "utam0k ")] Info, + #[clap(version = "0.0.1", author = "utam0k ")] + List, } /// This is the entry point in the container runtime. The binary is run by a high-level container runtime, @@ -116,7 +126,7 @@ fn main() -> Result<()> { let sig = signal::from_str(kill.signal.as_str())?; log::debug!("kill signal {} to {}", sig, container.pid().unwrap()); nix_signal::kill(container.pid().unwrap(), sig)?; - container.update_status(ContainerStatus::Stopped)?.save()?; + container.update_status(ContainerStatus::Stopped).save()?; std::process::exit(0) } else { bail!( @@ -189,5 +199,55 @@ fn main() -> Result<()> { Ok(()) } + + SubCommand::List => { + let root_path = fs::canonicalize(root_path)?; + let mut content = String::new(); + + for container_dir in fs::read_dir(root_path)? { + let container_dir = container_dir?.path(); + let state_file = container_dir.join("state.json"); + if !state_file.exists() { + continue; + } + + let container = Container::load(container_dir)?.refresh_status()?; + let pid = if let Some(pid) = container.pid() { + pid.to_string() + } else { + "".to_owned() + }; + + let user_name = if let Some(creator) = container.creator() { + creator + } else { + OsString::new() + }; + + let created = if let Some(utc) = container.created() { + let local: DateTime = DateTime::from(utc); + local.to_rfc3339_opts(chrono::SecondsFormat::Secs, false) + } else { + "".to_owned() + }; + + content.push_str(&format!( + "{}\t{}\t{}\t{}\t{}\t{}\n", + container.id(), + pid, + container.status(), + container.bundle(), + created, + user_name.to_string_lossy() + )); + } + + let mut tab_writer = TabWriter::new(io::stdout()); + writeln!(&mut tab_writer, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tCREATOR")?; + write!(&mut tab_writer, "{}", content)?; + tab_writer.flush()?; + + Ok(()) + } } } diff --git a/src/namespaces.rs b/src/namespaces.rs index 4fffea80e..6389dec9f 100644 --- a/src/namespaces.rs +++ b/src/namespaces.rs @@ -15,8 +15,8 @@ use nix::{ unistd::{self, Gid, Uid}, }; -use crate::command::{linux::LinuxCommand, test::TestHelperCommand, Command}; -use oci_spec::{LinuxNamespace, LinuxNamespaceType}; +use crate::command::{command::create_command, Command}; +use oci_spec::LinuxNamespace; pub struct Namespaces { spaces: Vec, @@ -33,11 +33,7 @@ impl From> for Namespaces { cf }, ); - let command: Box = if cfg!(test) { - Box::new(TestHelperCommand::default()) - } else { - Box::new(LinuxCommand) - }; + let command: Box = create_command(); Namespaces { spaces: namespaces, @@ -80,10 +76,13 @@ impl Namespaces { } } +#[cfg(test)] mod tests { + use oci_spec::LinuxNamespaceType; + use super::*; + use crate::command::test::TestHelperCommand; - #[allow(dead_code)] fn gen_sample_linux_namespaces() -> Vec { vec![ LinuxNamespace { diff --git a/src/process/fork.rs b/src/process/fork.rs index 8a7a25198..211d59915 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -74,7 +74,8 @@ pub fn fork_first>( // update status and pid of the container process container - .update_status(ContainerStatus::Created)? + .update_status(ContainerStatus::Created) + .set_creator(nix::unistd::geteuid().as_raw()) .set_pid(init_pid) .save()?; // if file to write the pid to is specified, write pid of the child diff --git a/src/start.rs b/src/start.rs index bad12a7b4..d37f95cbc 100644 --- a/src/start.rs +++ b/src/start.rs @@ -36,7 +36,7 @@ impl Start { let mut notify_socket = NotifySocket::new(&container.root)?; notify_socket.notify_container_start()?; - container.update_status(ContainerStatus::Running)?.save()?; + container.update_status(ContainerStatus::Running).save()?; Ok(()) } } From cd383904cfedad574ef2d163fe84a5b7a261dc43 Mon Sep 17 00:00:00 2001 From: utam0k Date: Sat, 26 Jun 2021 17:01:42 +0900 Subject: [PATCH 26/70] split the subcommands into their own files. --- src/cgroups/v2/systemd_manager.rs | 2 +- src/delete.rs | 67 ++++++++++ src/info.rs | 20 ++- src/kill.rs | 46 +++++++ src/lib.rs | 4 + src/list.rs | 67 ++++++++++ src/main.rs | 198 +++--------------------------- src/state.rs | 22 ++++ 8 files changed, 243 insertions(+), 183 deletions(-) create mode 100644 src/delete.rs create mode 100644 src/kill.rs create mode 100644 src/list.rs create mode 100644 src/state.rs diff --git a/src/cgroups/v2/systemd_manager.rs b/src/cgroups/v2/systemd_manager.rs index 6e01e3050..a21b65bf6 100644 --- a/src/cgroups/v2/systemd_manager.rs +++ b/src/cgroups/v2/systemd_manager.rs @@ -202,7 +202,7 @@ impl SystemDCGroupManager { Ok(controllers) } - fn write_controllers(path: &Path, controllers: &Vec) -> Result<()> { + fn write_controllers(path: &Path, controllers: &[String]) -> Result<()> { for controller in controllers { common::write_cgroup_file_str(path.join(CGROUP_SUBTREE_CONTROL), controller)?; } diff --git a/src/delete.rs b/src/delete.rs new file mode 100644 index 000000000..7b535bace --- /dev/null +++ b/src/delete.rs @@ -0,0 +1,67 @@ +use std::fs; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::{bail, Result}; +use clap::Clap; + +use crate::cgroups; +use crate::container::Container; +use crate::utils; + +#[derive(Clap, Debug)] +pub struct Delete { + container_id: String, + // forces deletion of the container. + #[clap(short, long)] + force: bool, +} + +impl Delete { + pub fn exec(&self, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> { + log::debug!("start deleting {}", self.container_id); + // state of container is stored in a directory named as container id inside + // root directory given in commandline options + let container_root = root_path.join(&self.container_id); + if !container_root.exists() { + bail!("{} doesn't exist.", self.container_id) + } + // load container state from json file, and check status of the container + // it might be possible that delete is invoked on a running container. + log::debug!("load the container from {:?}", container_root); + let container = Container::load(container_root)?.refresh_status()?; + if container.can_delete() { + if container.root.exists() { + nix::unistd::chdir(&PathBuf::from(&container.state.bundle))?; + let config_absolute_path = &PathBuf::from(&container.state.bundle) + .join(Path::new("config.json")) + .to_string_lossy() + .to_string(); + log::debug!("load spec from {:?}", config_absolute_path); + let spec = oci_spec::Spec::load(config_absolute_path)?; + log::debug!("spec: {:?}", spec); + + // remove the directory storing container state + log::debug!("remove dir {:?}", container.root); + fs::remove_dir_all(&container.root)?; + + let cgroups_path = + utils::get_cgroup_path(&spec.linux.unwrap().cgroups_path, container.id()); + + // remove the cgroup created for the container + // check https://man7.org/linux/man-pages/man7/cgroups.7.html + // creating and removing cgroups section for more information on cgroups + let cmanager = + cgroups::common::create_cgroup_manager(cgroups_path, systemd_cgroup)?; + cmanager.remove()?; + } + std::process::exit(0) + } else { + bail!( + "{} could not be deleted because it was {:?}", + container.id(), + container.status() + ) + } + } +} diff --git a/src/info.rs b/src/info.rs index b726e25d5..087b7bbee 100644 --- a/src/info.rs +++ b/src/info.rs @@ -1,8 +1,26 @@ -use procfs::{CpuInfo, Meminfo}; use std::{fs, path::Path}; +use anyhow::Result; +use clap::Clap; +use procfs::{CpuInfo, Meminfo}; + use crate::cgroups; +#[derive(Clap, Debug)] +pub struct Info {} + +impl Info { + pub fn exec(&self) -> Result<()> { + print_youki(); + print_kernel(); + print_os(); + print_hardware(); + print_cgroups(); + + Ok(()) + } +} + pub fn print_youki() { println!("{:<18}{}", "Version", env!("CARGO_PKG_VERSION")); } diff --git a/src/kill.rs b/src/kill.rs new file mode 100644 index 000000000..9c0eebb2d --- /dev/null +++ b/src/kill.rs @@ -0,0 +1,46 @@ +use std::{fs, path::PathBuf}; + +use anyhow::{bail, Result}; +use clap::Clap; +use nix::sys::signal as nix_signal; + +use crate::{ + container::{Container, ContainerStatus}, + signal, +}; + +#[derive(Clap, Debug)] +pub struct Kill { + container_id: String, + signal: String, +} + +impl Kill { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + // resolves relative paths, symbolic links etc. and get complete path + let root_path = fs::canonicalize(root_path)?; + // state of container is stored in a directory named as container id inside + // root directory given in commandline options + let container_root = root_path.join(&self.container_id); + if !container_root.exists() { + bail!("{} doesn't exist.", self.container_id) + } + + // load container state from json file, and check status of the container + // it might be possible that kill is invoked on a already stopped container etc. + let container = Container::load(container_root)?.refresh_status()?; + if container.can_kill() { + let sig = signal::from_str(self.signal.as_str())?; + log::debug!("kill signal {} to {}", sig, container.pid().unwrap()); + nix_signal::kill(container.pid().unwrap(), sig)?; + container.update_status(ContainerStatus::Stopped).save()?; + std::process::exit(0) + } else { + bail!( + "{} could not be killed because it was {:?}", + container.id(), + container.status() + ) + } + } +} diff --git a/src/lib.rs b/src/lib.rs index b8509f676..da3c23572 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,10 @@ pub mod command; pub mod container; pub mod create; pub mod dbus; +pub mod delete; pub mod info; +pub mod kill; +pub mod list; pub mod logger; pub mod namespaces; pub mod notify_socket; @@ -18,6 +21,7 @@ pub mod rootfs; pub mod rootless; pub mod signal; pub mod start; +pub mod state; pub mod stdio; pub mod tty; pub mod utils; diff --git a/src/list.rs b/src/list.rs new file mode 100644 index 000000000..e99fd0245 --- /dev/null +++ b/src/list.rs @@ -0,0 +1,67 @@ +use std::ffi::OsString; +use std::fs; +use std::io; +use std::io::Write; +use std::path::PathBuf; + +use anyhow::Result; +use chrono::{DateTime, Local}; +use clap::Clap; +use tabwriter::TabWriter; + +use crate::container::Container; + +#[derive(Clap, Debug)] +pub struct List {} + +impl List { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let root_path = fs::canonicalize(root_path)?; + let mut content = String::new(); + + for container_dir in fs::read_dir(root_path)? { + let container_dir = container_dir?.path(); + let state_file = container_dir.join("state.json"); + if !state_file.exists() { + continue; + } + + let container = Container::load(container_dir)?.refresh_status()?; + let pid = if let Some(pid) = container.pid() { + pid.to_string() + } else { + "".to_owned() + }; + + let user_name = if let Some(creator) = container.creator() { + creator + } else { + OsString::new() + }; + + let created = if let Some(utc) = container.created() { + let local: DateTime = DateTime::from(utc); + local.to_rfc3339_opts(chrono::SecondsFormat::Secs, false) + } else { + "".to_owned() + }; + + content.push_str(&format!( + "{}\t{}\t{}\t{}\t{}\t{}\n", + container.id(), + pid, + container.status(), + container.bundle(), + created, + user_name.to_string_lossy() + )); + } + + let mut tab_writer = TabWriter::new(io::stdout()); + writeln!(&mut tab_writer, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tCREATOR")?; + write!(&mut tab_writer, "{}", content)?; + tab_writer.flush()?; + + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 5acec9290..b244d09f5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,31 +2,21 @@ //! Container Runtime written in Rust, inspired by [railcar](https://github.com/oracle/railcar) //! This crate provides a container runtime which can be used by a high-level container runtime to run containers. -use std::ffi::OsString; - use std::fs; -use std::io; -use std::io::Write; - -use std::path::{Path, PathBuf}; +use std::path::PathBuf; -use anyhow::{bail, Result}; -use chrono::{DateTime, Local}; +use anyhow::Result; use clap::Clap; -use nix::sys::signal as nix_signal; use youki::command::linux::LinuxCommand; - -use youki::container::{Container, ContainerStatus}; use youki::create; -use youki::info::{print_cgroups, print_hardware, print_kernel, print_os, print_youki}; +use youki::delete; +use youki::info; +use youki::kill; +use youki::list; use youki::rootless::should_use_rootless; -use youki::signal; use youki::start; - -use tabwriter::TabWriter; -use youki::cgroups; -use youki::utils; +use youki::state; /// High-level commandline option definition /// This takes global options as well as individual commands as specified in [OCI runtime-spec](https://github.com/opencontainers/runtime-spec/blob/master/runtime.md) @@ -49,25 +39,6 @@ struct Opts { subcmd: SubCommand, } -#[derive(Clap, Debug)] -pub struct Kill { - container_id: String, - signal: String, -} - -#[derive(Clap, Debug)] -pub struct Delete { - container_id: String, - // forces deletion of the container. - #[clap(short, long)] - force: bool, -} - -#[derive(Clap, Debug)] -pub struct StateArgs { - pub container_id: String, -} - /// Subcommands accepted by Youki, confirming with [OCI runtime-spec](https://github.com/opencontainers/runtime-spec/blob/master/runtime.md) /// Also for a short information, check [runc commandline documentation](https://github.com/opencontainers/runc/blob/master/man/runc.8.md) #[derive(Clap, Debug)] @@ -77,15 +48,15 @@ enum SubCommand { #[clap(version = "0.0.1", author = "utam0k ")] Start(start::Start), #[clap(version = "0.0.1", author = "utam0k ")] - Kill(Kill), + Kill(kill::Kill), #[clap(version = "0.0.1", author = "utam0k ")] - Delete(Delete), + Delete(delete::Delete), #[clap(version = "0.0.1", author = "utam0k ")] - State(StateArgs), + State(state::State), #[clap(version = "0.0.1", author = "utam0k ")] - Info, + Info(info::Info), #[clap(version = "0.0.1", author = "utam0k ")] - List, + List(list::List), } /// This is the entry point in the container runtime. The binary is run by a high-level container runtime, @@ -109,145 +80,10 @@ fn main() -> Result<()> { match opts.subcmd { SubCommand::Create(create) => create.exec(root_path, systemd_cgroup, LinuxCommand), SubCommand::Start(start) => start.exec(root_path), - SubCommand::Kill(kill) => { - // resolves relative paths, symbolic links etc. and get complete path - let root_path = fs::canonicalize(root_path)?; - // state of container is stored in a directory named as container id inside - // root directory given in commandline options - let container_root = root_path.join(&kill.container_id); - if !container_root.exists() { - bail!("{} doesn't exist.", kill.container_id) - } - - // load container state from json file, and check status of the container - // it might be possible that kill is invoked on a already stopped container etc. - let container = Container::load(container_root)?.refresh_status()?; - if container.can_kill() { - let sig = signal::from_str(kill.signal.as_str())?; - log::debug!("kill signal {} to {}", sig, container.pid().unwrap()); - nix_signal::kill(container.pid().unwrap(), sig)?; - container.update_status(ContainerStatus::Stopped).save()?; - std::process::exit(0) - } else { - bail!( - "{} could not be killed because it was {:?}", - container.id(), - container.status() - ) - } - } - SubCommand::Delete(delete) => { - log::debug!("start deleting {}", delete.container_id); - // state of container is stored in a directory named as container id inside - // root directory given in commandline options - let container_root = root_path.join(&delete.container_id); - if !container_root.exists() { - bail!("{} doesn't exist.", delete.container_id) - } - // load container state from json file, and check status of the container - // it might be possible that delete is invoked on a running container. - log::debug!("load the container from {:?}", container_root); - let container = Container::load(container_root)?.refresh_status()?; - if container.can_delete() { - if container.root.exists() { - nix::unistd::chdir(&PathBuf::from(&container.state.bundle))?; - let config_absolute_path = &PathBuf::from(&container.state.bundle) - .join(Path::new("config.json")) - .to_string_lossy() - .to_string(); - log::debug!("load spec from {:?}", config_absolute_path); - let spec = oci_spec::Spec::load(config_absolute_path)?; - log::debug!("spec: {:?}", spec); - - // remove the directory storing container state - log::debug!("remove dir {:?}", container.root); - fs::remove_dir_all(&container.root)?; - - let cgroups_path = - utils::get_cgroup_path(&spec.linux.unwrap().cgroups_path, container.id()); - - // remove the cgroup created for the container - // check https://man7.org/linux/man-pages/man7/cgroups.7.html - // creating and removing cgroups section for more information on cgroups - let cmanager = - cgroups::common::create_cgroup_manager(cgroups_path, systemd_cgroup)?; - cmanager.remove()?; - } - std::process::exit(0) - } else { - bail!( - "{} could not be deleted because it was {:?}", - container.id(), - container.status() - ) - } - } - SubCommand::State(state_args) => { - let root_path = fs::canonicalize(root_path)?; - let container_root = root_path.join(state_args.container_id); - let container = Container::load(container_root)?.refresh_status()?; - println!("{}", serde_json::to_string_pretty(&container.state)?); - std::process::exit(0); - } - - SubCommand::Info => { - print_youki(); - print_kernel(); - print_os(); - print_hardware(); - print_cgroups(); - - Ok(()) - } - - SubCommand::List => { - let root_path = fs::canonicalize(root_path)?; - let mut content = String::new(); - - for container_dir in fs::read_dir(root_path)? { - let container_dir = container_dir?.path(); - let state_file = container_dir.join("state.json"); - if !state_file.exists() { - continue; - } - - let container = Container::load(container_dir)?.refresh_status()?; - let pid = if let Some(pid) = container.pid() { - pid.to_string() - } else { - "".to_owned() - }; - - let user_name = if let Some(creator) = container.creator() { - creator - } else { - OsString::new() - }; - - let created = if let Some(utc) = container.created() { - let local: DateTime = DateTime::from(utc); - local.to_rfc3339_opts(chrono::SecondsFormat::Secs, false) - } else { - "".to_owned() - }; - - content.push_str(&format!( - "{}\t{}\t{}\t{}\t{}\t{}\n", - container.id(), - pid, - container.status(), - container.bundle(), - created, - user_name.to_string_lossy() - )); - } - - let mut tab_writer = TabWriter::new(io::stdout()); - writeln!(&mut tab_writer, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tCREATOR")?; - write!(&mut tab_writer, "{}", content)?; - tab_writer.flush()?; - - Ok(()) - } + SubCommand::Kill(kill) => kill.exec(root_path), + SubCommand::Delete(delete) => delete.exec(root_path, systemd_cgroup), + SubCommand::State(state) => state.exec(root_path), + SubCommand::Info(info) => info.exec(), + SubCommand::List(list) => list.exec(root_path), } } diff --git a/src/state.rs b/src/state.rs new file mode 100644 index 000000000..7be62193c --- /dev/null +++ b/src/state.rs @@ -0,0 +1,22 @@ +use std::fs; +use std::path::PathBuf; + +use anyhow::Result; +use clap::Clap; + +use crate::container::Container; + +#[derive(Clap, Debug)] +pub struct State { + pub container_id: String, +} + +impl State { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let root_path = fs::canonicalize(root_path)?; + let container_root = root_path.join(&self.container_id); + let container = Container::load(container_root)?.refresh_status()?; + println!("{}", serde_json::to_string_pretty(&container.state)?); + std::process::exit(0); + } +} From b43aae070b8bf851c4c21ce4fce59be58ece4a7b Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 26 Jun 2021 01:26:24 +0200 Subject: [PATCH 27/70] Seperate adding tasks and applying resource restrictions --- src/cgroups/common.rs | 3 +- src/cgroups/v1/blkio.rs | 25 ++---- src/cgroups/v1/controller.rs | 12 ++- src/cgroups/v1/controller_type.rs | 1 + src/cgroups/v1/cpu.rs | 10 +-- src/cgroups/v1/cpuacct.rs | 22 +++--- src/cgroups/v1/cpuset.rs | 14 +++- src/cgroups/v1/devices.rs | 9 +-- src/cgroups/v1/freezer.rs | 43 +++++----- src/cgroups/v1/hugetlb.rs | 23 ++---- src/cgroups/v1/manager.rs | 62 ++++++++++----- src/cgroups/v1/memory.rs | 23 ++---- src/cgroups/v1/network_classifier.rs | 8 +- src/cgroups/v1/network_priority.rs | 8 +- src/cgroups/v1/pids.rs | 13 +-- src/cgroups/v2/manager.rs | 48 ++++++------ src/cgroups/v2/systemd_manager.rs | 113 +++++++++++++-------------- src/process/fork.rs | 3 +- 18 files changed, 214 insertions(+), 226 deletions(-) diff --git a/src/cgroups/common.rs b/src/cgroups/common.rs index 26f626805..84a6dc148 100644 --- a/src/cgroups/common.rs +++ b/src/cgroups/common.rs @@ -19,7 +19,8 @@ pub const CGROUP_PROCS: &str = "cgroup.procs"; pub const DEFAULT_CGROUP_ROOT: &str = "/sys/fs/cgroup"; pub trait CgroupManager { - fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()>; + fn add_task(&self, pid: Pid) -> Result<()>; + fn apply(&self, linux_resources: &LinuxResources) -> Result<()>; fn remove(&self) -> Result<()>; } diff --git a/src/cgroups/v1/blkio.rs b/src/cgroups/v1/blkio.rs index 2801eb2b7..d80dc36af 100644 --- a/src/cgroups/v1/blkio.rs +++ b/src/cgroups/v1/blkio.rs @@ -1,12 +1,7 @@ -use std::{ - fs::{self}, - path::Path, -}; - -use crate::cgroups::{ - common::{self, CGROUP_PROCS}, - v1::Controller, -}; +use std::path::Path; + +use crate::cgroups::{common, v1::Controller}; +use anyhow::Result; use oci_spec::{LinuxBlockIo, LinuxResources}; const CGROUP_BLKIO_THROTTLE_READ_BPS: &str = "blkio.throttle.read_bps_device"; @@ -17,25 +12,19 @@ const CGROUP_BLKIO_THROTTLE_WRITE_IOPS: &str = "blkio.throttle.write_iops_device pub struct Blkio {} impl Controller for Blkio { - fn apply( - linux_resources: &LinuxResources, - cgroup_root: &Path, - pid: nix::unistd::Pid, - ) -> anyhow::Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply blkio cgroup config"); - fs::create_dir_all(cgroup_root)?; if let Some(blkio) = &linux_resources.block_io { Self::apply(cgroup_root, blkio)?; } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } impl Blkio { - fn apply(root_path: &Path, blkio: &LinuxBlockIo) -> anyhow::Result<()> { + fn apply(root_path: &Path, blkio: &LinuxBlockIo) -> Result<()> { for trbd in &blkio.blkio_throttle_read_bps_device { common::write_cgroup_file_str( &root_path.join(CGROUP_BLKIO_THROTTLE_READ_BPS), @@ -70,6 +59,8 @@ impl Blkio { #[cfg(test)] mod tests { + use std::fs; + use super::*; use crate::cgroups::test::setup; use oci_spec::{LinuxBlockIo, LinuxThrottleDevice}; diff --git a/src/cgroups/v1/controller.rs b/src/cgroups/v1/controller.rs index 84e0b3cc2..408d357da 100644 --- a/src/cgroups/v1/controller.rs +++ b/src/cgroups/v1/controller.rs @@ -1,10 +1,18 @@ -use std::path::Path; +use std::{fs, path::Path}; use anyhow::Result; use nix::unistd::Pid; use oci_spec::LinuxResources; +use crate::cgroups::common::{self, CGROUP_PROCS}; + pub trait Controller { - fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()>; + fn add_task(pid: Pid, cgroup_path: &Path) -> Result<()> { + fs::create_dir_all(cgroup_path)?; + common::write_cgroup_file(cgroup_path.join(CGROUP_PROCS), pid)?; + Ok(()) + } + + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()>; } diff --git a/src/cgroups/v1/controller_type.rs b/src/cgroups/v1/controller_type.rs index 449c1bf41..6fbf0f37f 100644 --- a/src/cgroups/v1/controller_type.rs +++ b/src/cgroups/v1/controller_type.rs @@ -1,5 +1,6 @@ use std::fmt::Display; +#[derive(Hash, PartialEq, Eq, Debug, Clone)] pub enum ControllerType { Cpu, CpuAcct, diff --git a/src/cgroups/v1/cpu.rs b/src/cgroups/v1/cpu.rs index 50a7c7eb5..856d90a0b 100644 --- a/src/cgroups/v1/cpu.rs +++ b/src/cgroups/v1/cpu.rs @@ -1,10 +1,9 @@ -use std::{fs, path::Path}; +use std::path::Path; use anyhow::Result; -use nix::unistd::Pid; use oci_spec::{LinuxCpu, LinuxResources}; -use crate::cgroups::common::{self, CGROUP_PROCS}; +use crate::cgroups::common; use super::Controller; @@ -17,14 +16,13 @@ const CGROUP_CPU_RT_PERIOD: &str = "cpu.rt_period_us"; pub struct Cpu {} impl Controller for Cpu { - fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply Cpu cgroup config"); - fs::create_dir_all(cgroup_root)?; + if let Some(cpu) = &linux_resources.cpu { Self::apply(cgroup_root, cpu)?; } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } diff --git a/src/cgroups/v1/cpuacct.rs b/src/cgroups/v1/cpuacct.rs index 947599cb2..889f905d0 100644 --- a/src/cgroups/v1/cpuacct.rs +++ b/src/cgroups/v1/cpuacct.rs @@ -1,37 +1,33 @@ -use std::{fs, path::Path}; +use std::path::Path; use anyhow::Result; -use nix::unistd::Pid; use oci_spec::LinuxResources; -use crate::cgroups::common::{self, CGROUP_PROCS}; - use super::Controller; pub struct CpuAcct {} impl Controller for CpuAcct { - fn apply(_linux_resources: &LinuxResources, cgroup_path: &Path, pid: Pid) -> Result<()> { - log::debug!("Apply cpuacct cgroup config"); - fs::create_dir_all(cgroup_path)?; - - common::write_cgroup_file(cgroup_path.join(CGROUP_PROCS), pid)?; + fn apply(_linux_resources: &LinuxResources, _cgroup_path: &Path) -> Result<()> { Ok(()) } } #[cfg(test)] mod tests { + use std::fs; + + use nix::unistd::Pid; + use super::*; - use crate::cgroups::test::setup; + use crate::cgroups::{common::CGROUP_PROCS, test::setup}; #[test] - fn test_apply() { + fn test_add_task() { let (tmp, procs) = setup("test_cpuacct_apply", CGROUP_PROCS); - let resource = LinuxResources::default(); let pid = Pid::from_raw(1000); - CpuAcct::apply(&resource, &tmp, pid).expect("apply cpuacct"); + CpuAcct::add_task(pid, &tmp).expect("apply cpuacct"); let content = fs::read_to_string(&procs) .unwrap_or_else(|_| panic!("read {} file content", CGROUP_PROCS)); diff --git a/src/cgroups/v1/cpuset.rs b/src/cgroups/v1/cpuset.rs index c08833c59..e232120c3 100644 --- a/src/cgroups/v1/cpuset.rs +++ b/src/cgroups/v1/cpuset.rs @@ -1,8 +1,9 @@ use std::{fs, path::Path}; use anyhow::{bail, Result}; -use nix::unistd::Pid; +use nix::unistd; use oci_spec::{LinuxCpu, LinuxResources}; +use unistd::Pid; use crate::cgroups::common::{self, CGROUP_PROCS}; @@ -14,18 +15,23 @@ const CGROUP_CPUSET_MEMS: &str = "cpuset.mems"; pub struct CpuSet {} impl Controller for CpuSet { - fn apply(linux_resources: &LinuxResources, cgroup_path: &Path, pid: Pid) -> Result<()> { - log::debug!("Apply CpuSet cgroup config"); + fn add_task(pid: Pid, cgroup_path: &Path) -> Result<()> { fs::create_dir_all(cgroup_path)?; Self::ensure_not_empty(cgroup_path, CGROUP_CPUSET_CPUS)?; Self::ensure_not_empty(cgroup_path, CGROUP_CPUSET_MEMS)?; + common::write_cgroup_file(cgroup_path.join(CGROUP_PROCS), pid)?; + Ok(()) + } + + fn apply(linux_resources: &LinuxResources, cgroup_path: &Path) -> Result<()> { + log::debug!("Apply CpuSet cgroup config"); + if let Some(cpuset) = &linux_resources.cpu { Self::apply(cgroup_path, cpuset)?; } - common::write_cgroup_file(cgroup_path.join(CGROUP_PROCS), pid)?; Ok(()) } } diff --git a/src/cgroups/v1/devices.rs b/src/cgroups/v1/devices.rs index 2599ed06a..1f71bfcda 100644 --- a/src/cgroups/v1/devices.rs +++ b/src/cgroups/v1/devices.rs @@ -1,18 +1,16 @@ -use std::{fs::create_dir_all, path::Path}; +use std::path::Path; use anyhow::Result; -use nix::unistd::Pid; -use crate::cgroups::common::{self, CGROUP_PROCS}; +use crate::cgroups::common; use crate::{cgroups::v1::Controller, rootfs::default_devices}; use oci_spec::{LinuxDeviceCgroup, LinuxDeviceType, LinuxResources}; pub struct Devices {} impl Controller for Devices { - fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply Devices cgroup config"); - create_dir_all(&cgroup_root)?; for d in &linux_resources.devices { Self::apply_device(d, cgroup_root)?; @@ -27,7 +25,6 @@ impl Controller for Devices { Self::apply_device(&d, &cgroup_root)?; } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } diff --git a/src/cgroups/v1/freezer.rs b/src/cgroups/v1/freezer.rs index 5ea995e93..27806ca0c 100644 --- a/src/cgroups/v1/freezer.rs +++ b/src/cgroups/v1/freezer.rs @@ -6,9 +6,8 @@ use std::{ }; use anyhow::{Result, *}; -use nix::unistd::Pid; -use crate::cgroups::common::{self, CGROUP_PROCS}; +use crate::cgroups::common; use crate::cgroups::v1::Controller; use oci_spec::{FreezerState, LinuxResources}; @@ -20,7 +19,7 @@ const FREEZER_STATE_FREEZING: &str = "FREEZING"; pub struct Freezer {} impl Controller for Freezer { - fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply Freezer cgroup config"); create_dir_all(&cgroup_root)?; @@ -28,7 +27,6 @@ impl Controller for Freezer { Self::apply(freezer_state, cgroup_root)?; } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } @@ -116,8 +114,10 @@ impl Freezer { #[cfg(test)] mod tests { use super::*; + use crate::cgroups::common::CGROUP_PROCS; use crate::cgroups::test::set_fixture; use crate::utils::create_temp_dir; + use nix::unistd::Pid; use oci_spec::FreezerState; #[test] @@ -160,10 +160,9 @@ mod tests { } #[test] - fn test_apply() { - let tmp = - create_temp_dir("test_set_freezer_state").expect("create temp directory for test"); - set_fixture(&tmp, CGROUP_FREEZER_STATE, "").expect("Set fixure for freezer state"); + fn test_add_and_apply() { + let tmp = create_temp_dir("test_add_task").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_FREEZER_STATE, "").expect("set fixure for freezer state"); set_fixture(&tmp, CGROUP_PROCS, "").expect("set fixture for proc file"); // set Thawed state. @@ -182,13 +181,13 @@ mod tests { }; let pid = Pid::from_raw(1000); - let _ = - ::apply(&linux_resources, &tmp, pid).expect("freezer apply"); + Freezer::add_task(pid, &tmp).expect("freezer add task"); + ::apply(&linux_resources, &tmp).expect("freezer apply"); let state_content = - std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("read to string"); assert_eq!(FREEZER_STATE_THAWED, state_content); let pid_content = - std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("Read to string"); + std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("read to string"); assert_eq!(pid_content, "1000"); } @@ -208,13 +207,13 @@ mod tests { }; let pid = Pid::from_raw(1001); - let _ = - ::apply(&linux_resources, &tmp, pid).expect("freezer apply"); + Freezer::add_task(pid, &tmp).expect("freezer add task"); + ::apply(&linux_resources, &tmp).expect("freezer apply"); let state_content = - std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("read to string"); assert_eq!(FREEZER_STATE_FROZEN, state_content); let pid_content = - std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("Read to string"); + std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("read to string"); assert_eq!(pid_content, "1001"); } @@ -233,16 +232,16 @@ mod tests { freezer: Some(FreezerState::Undefined), }; - let old_state_content = - std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); let pid = Pid::from_raw(1002); - let _ = - ::apply(&linux_resources, &tmp, pid).expect("freezer apply"); + let old_state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("read to string"); + Freezer::add_task(pid, &tmp).expect("freezer add task"); + ::apply(&linux_resources, &tmp).expect("freezer apply"); let state_content = - std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); + std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("read to string"); assert_eq!(old_state_content, state_content); let pid_content = - std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("Read to string"); + std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("read to string"); assert_eq!(pid_content, "1002"); } } diff --git a/src/cgroups/v1/hugetlb.rs b/src/cgroups/v1/hugetlb.rs index ad1a2604b..1ee473f79 100644 --- a/src/cgroups/v1/hugetlb.rs +++ b/src/cgroups/v1/hugetlb.rs @@ -1,36 +1,27 @@ -use std::{fs, path::Path}; +use std::path::Path; -use anyhow::bail; +use anyhow::{bail, Result}; use regex::Regex; -use crate::cgroups::{ - common::{self, CGROUP_PROCS}, - v1::Controller, -}; +use crate::cgroups::{common, v1::Controller}; use oci_spec::{LinuxHugepageLimit, LinuxResources}; pub struct Hugetlb {} impl Controller for Hugetlb { - fn apply( - linux_resources: &LinuxResources, - cgroup_root: &std::path::Path, - pid: nix::unistd::Pid, - ) -> anyhow::Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &std::path::Path) -> Result<()> { log::debug!("Apply Hugetlb cgroup config"); - fs::create_dir_all(cgroup_root)?; for hugetlb in &linux_resources.hugepage_limits { Self::apply(cgroup_root, hugetlb)? } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } impl Hugetlb { - fn apply(root_path: &Path, hugetlb: &LinuxHugepageLimit) -> anyhow::Result<()> { + fn apply(root_path: &Path, hugetlb: &LinuxHugepageLimit) -> Result<()> { let re = Regex::new(r"(?P[0-9]+)[KMG]B")?; let caps = re.captures(&hugetlb.page_size); match caps { @@ -44,8 +35,8 @@ impl Hugetlb { } common::write_cgroup_file( - &root_path.join(format!("hugetlb.{}.limit_in_bytes", hugetlb.page_size)), - &hugetlb.limit, + root_path.join(format!("hugetlb.{}.limit_in_bytes", hugetlb.page_size)), + hugetlb.limit, )?; Ok(()) } diff --git a/src/cgroups/v1/manager.rs b/src/cgroups/v1/manager.rs index e7814d372..2398b97b3 100644 --- a/src/cgroups/v1/manager.rs +++ b/src/cgroups/v1/manager.rs @@ -7,6 +7,7 @@ use nix::unistd::Pid; use procfs::process::Process; +use super::ControllerType; use super::{ blkio::Blkio, controller_type::CONTROLLERS, cpu::Cpu, cpuacct::CpuAcct, cpuset::CpuSet, devices::Devices, freezer::Freezer, hugetlb::Hugetlb, memory::Memory, @@ -19,16 +20,16 @@ use crate::utils; use crate::{cgroups::common::CgroupManager, utils::PathBufExt}; use oci_spec::LinuxResources; pub struct Manager { - subsystems: HashMap, + subsystems: HashMap, } impl Manager { pub fn new(cgroup_path: PathBuf) -> Result { - let mut subsystems = HashMap::::new(); - for subsystem in CONTROLLERS.iter().map(|c| c.to_string()) { + let mut subsystems = HashMap::::new(); + for subsystem in CONTROLLERS { subsystems.insert( - subsystem.to_owned(), - Self::get_subsystem_path(&cgroup_path, &subsystem)?, + subsystem.clone(), + Self::get_subsystem_path(&cgroup_path, &subsystem.to_string())?, ); } @@ -58,21 +59,44 @@ impl Manager { } impl CgroupManager for Manager { - fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> { + fn add_task(&self, pid: Pid) -> Result<()> { for subsys in &self.subsystems { - match subsys.0.as_str() { - "cpu" => Cpu::apply(linux_resources, &subsys.1, pid)?, - "cpuacct" => CpuAcct::apply(linux_resources, &subsys.1, pid)?, - "cpuset" => CpuSet::apply(linux_resources, &subsys.1, pid)?, - "devices" => Devices::apply(linux_resources, &subsys.1, pid)?, - "hugetlb" => Hugetlb::apply(linux_resources, &subsys.1, pid)?, - "memory" => Memory::apply(linux_resources, &subsys.1, pid)?, - "pids" => Pids::apply(linux_resources, &subsys.1, pid)?, - "blkio" => Blkio::apply(linux_resources, &subsys.1, pid)?, - "net_prio" => NetworkPriority::apply(linux_resources, &subsys.1, pid)?, - "net_cls" => NetworkClassifier::apply(linux_resources, &subsys.1, pid)?, - "freezer" => Freezer::apply(linux_resources, &subsys.1, pid)?, - _ => unreachable!("every subsystem should have an associated controller"), + match subsys.0 { + ControllerType::Cpu => Cpu::add_task(pid, subsys.1)?, + ControllerType::CpuAcct => CpuAcct::add_task(pid, subsys.1)?, + ControllerType::CpuSet => CpuSet::add_task(pid, subsys.1)?, + ControllerType::Devices => Devices::add_task(pid, subsys.1)?, + ControllerType::HugeTlb => Hugetlb::add_task(pid, subsys.1)?, + ControllerType::Memory => Memory::add_task(pid, subsys.1)?, + ControllerType::Pids => Pids::add_task(pid, subsys.1)?, + ControllerType::Blkio => Blkio::add_task(pid, subsys.1)?, + ControllerType::NetworkPriority => NetworkPriority::add_task(pid, subsys.1)?, + ControllerType::NetworkClassifier => NetworkClassifier::add_task(pid, subsys.1)?, + _ => continue, + } + } + + Ok(()) + } + + fn apply(&self, linux_resources: &LinuxResources) -> Result<()> { + for subsys in &self.subsystems { + match subsys.0 { + ControllerType::Cpu => Cpu::apply(linux_resources, &subsys.1)?, + ControllerType::CpuAcct => CpuAcct::apply(linux_resources, &subsys.1)?, + ControllerType::CpuSet => CpuSet::apply(linux_resources, &subsys.1)?, + ControllerType::Devices => Devices::apply(linux_resources, &subsys.1)?, + ControllerType::HugeTlb => Hugetlb::apply(linux_resources, &subsys.1)?, + ControllerType::Memory => Memory::apply(linux_resources, &subsys.1)?, + ControllerType::Pids => Pids::apply(linux_resources, &subsys.1)?, + ControllerType::Blkio => Blkio::apply(linux_resources, &subsys.1)?, + ControllerType::NetworkPriority => { + NetworkPriority::apply(linux_resources, &subsys.1)? + } + ControllerType::NetworkClassifier => { + NetworkClassifier::apply(linux_resources, &subsys.1)? + } + ControllerType::Freezer => Freezer::apply(linux_resources, &subsys.1)?, } } diff --git a/src/cgroups/v1/memory.rs b/src/cgroups/v1/memory.rs index f00a7f450..aa09e58e7 100644 --- a/src/cgroups/v1/memory.rs +++ b/src/cgroups/v1/memory.rs @@ -1,13 +1,10 @@ use std::io::{prelude::*, Write}; -use std::{ - fs::{create_dir_all, OpenOptions}, - path::Path, -}; +use std::{fs::OpenOptions, path::Path}; use anyhow::{Result, *}; -use nix::{errno::Errno, unistd::Pid}; +use nix::errno::Errno; -use crate::cgroups::common::{self, CGROUP_PROCS}; +use crate::cgroups::common::{self}; use crate::cgroups::v1::Controller; use oci_spec::{LinuxMemory, LinuxResources}; @@ -25,9 +22,8 @@ const CGROUP_KERNEL_TCP_MEMORY_LIMIT: &str = "memory.kmem.tcp.limit_in_bytes"; pub struct Memory {} impl Controller for Memory { - fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply Memory cgroup config"); - create_dir_all(&cgroup_root)?; if let Some(memory) = &linux_resources.memory { let reservation = memory.reservation.unwrap_or(0); @@ -76,7 +72,6 @@ impl Controller for Memory { } } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } @@ -239,6 +234,7 @@ impl Memory { #[cfg(test)] mod tests { use super::*; + use crate::cgroups::common::CGROUP_PROCS; use crate::cgroups::test::set_fixture; use crate::utils::create_temp_dir; use oci_spec::LinuxMemory; @@ -368,8 +364,7 @@ mod tests { freezer: None, }; - let pid = Pid::from_raw(pid_int); - let result = ::apply(&linux_resources, &tmp, pid); + let result = ::apply(&linux_resources, &tmp); if result.is_err() { if let Some(swappiness) = memory_limits.swappiness { @@ -455,10 +450,6 @@ mod tests { } }; - // check procs file - let procs_content = std::fs::read_to_string(tmp.join(CGROUP_PROCS)).expect("read procs file"); - let procs_check = procs_content == pid.to_string(); - // useful for debugging println!("reservation_check: {:?}", reservation_check); println!("kernel_check: {:?}", kernel_check); @@ -467,7 +458,7 @@ mod tests { println!("limit_swap_check: {:?}", limit_swap_check); // combine all the checks - reservation_check && kernel_check && kernel_tcp_check && swappiness_check && limit_swap_check && procs_check + reservation_check && kernel_check && kernel_tcp_check && swappiness_check && limit_swap_check } } } diff --git a/src/cgroups/v1/network_classifier.rs b/src/cgroups/v1/network_classifier.rs index 88da25653..fed3ae2e7 100644 --- a/src/cgroups/v1/network_classifier.rs +++ b/src/cgroups/v1/network_classifier.rs @@ -1,25 +1,21 @@ -use std::{fs::create_dir_all, path::Path}; +use std::path::Path; use anyhow::Result; -use nix::unistd::Pid; use crate::cgroups::common; -use crate::cgroups::common::CGROUP_PROCS; use crate::cgroups::v1::Controller; use oci_spec::{LinuxNetwork, LinuxResources}; pub struct NetworkClassifier {} impl Controller for NetworkClassifier { - fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply NetworkClassifier cgroup config"); - create_dir_all(&cgroup_root)?; if let Some(network) = linux_resources.network.as_ref() { Self::apply(cgroup_root, network)?; } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } diff --git a/src/cgroups/v1/network_priority.rs b/src/cgroups/v1/network_priority.rs index 05f6c3bf6..6a25a017c 100644 --- a/src/cgroups/v1/network_priority.rs +++ b/src/cgroups/v1/network_priority.rs @@ -1,25 +1,21 @@ -use std::{fs::create_dir_all, path::Path}; +use std::path::Path; use anyhow::Result; -use nix::unistd::Pid; use crate::cgroups::common; -use crate::cgroups::common::CGROUP_PROCS; use crate::cgroups::v1::Controller; use oci_spec::{LinuxNetwork, LinuxResources}; pub struct NetworkPriority {} impl Controller for NetworkPriority { - fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply NetworkPriority cgroup config"); - create_dir_all(&cgroup_root)?; if let Some(network) = linux_resources.network.as_ref() { Self::apply(cgroup_root, network)?; } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } diff --git a/src/cgroups/v1/pids.rs b/src/cgroups/v1/pids.rs index e41153db1..09f905cce 100644 --- a/src/cgroups/v1/pids.rs +++ b/src/cgroups/v1/pids.rs @@ -1,14 +1,8 @@ -use std::{ - fs::{self}, - path::Path, -}; +use std::path::Path; use anyhow::Result; -use crate::cgroups::{ - common::{self, CGROUP_PROCS}, - v1::Controller, -}; +use crate::cgroups::{common, v1::Controller}; use oci_spec::{LinuxPids, LinuxResources}; pub struct Pids {} @@ -17,16 +11,13 @@ impl Controller for Pids { fn apply( linux_resources: &LinuxResources, cgroup_root: &std::path::Path, - pid: nix::unistd::Pid, ) -> anyhow::Result<()> { log::debug!("Apply pids cgroup config"); - fs::create_dir_all(cgroup_root)?; if let Some(pids) = &linux_resources.pids { Self::apply(cgroup_root, pids)?; } - common::write_cgroup_file(cgroup_root.join(CGROUP_PROCS), pid)?; Ok(()) } } diff --git a/src/cgroups/v2/manager.rs b/src/cgroups/v2/manager.rs index 77a04c618..1da2aae4f 100644 --- a/src/cgroups/v2/manager.rs +++ b/src/cgroups/v2/manager.rs @@ -34,28 +34,31 @@ const CONTROLLER_TYPES: &[ControllerType] = &[ pub struct Manager { root_path: PathBuf, cgroup_path: PathBuf, + full_path: PathBuf, } impl Manager { pub fn new(root_path: PathBuf, cgroup_path: PathBuf) -> Result { + let full_path = root_path.join_absolute_path(&cgroup_path)?; + Ok(Self { root_path, cgroup_path, + full_path, }) } - fn create_unified_cgroup(&self, cgroup_path: &Path, pid: Pid) -> Result { - let full_path = self.root_path.join_absolute_path(cgroup_path)?; + fn create_unified_cgroup(&self, pid: Pid) -> Result<()> { let controllers: Vec = self - .get_available_controllers(&self.root_path)? - .into_iter() + .get_available_controllers()? + .iter() .map(|c| format!("{}{}", "+", c.to_string())) .collect(); Self::write_controllers(&self.root_path, &controllers)?; let mut current_path = self.root_path.clone(); - let mut components = cgroup_path.components().skip(1).peekable(); + let mut components = self.cgroup_path.components().skip(1).peekable(); while let Some(component) = components.next() { current_path = current_path.join(component); if !current_path.exists() { @@ -70,15 +73,12 @@ impl Manager { } } - common::write_cgroup_file(&full_path.join(CGROUP_PROCS), pid)?; - Ok(full_path) + common::write_cgroup_file(&self.full_path.join(CGROUP_PROCS), pid)?; + Ok(()) } - fn get_available_controllers>( - &self, - cgroup_path: P, - ) -> Result> { - let controllers_path = self.root_path.join(cgroup_path).join(CGROUP_CONTROLLERS); + fn get_available_controllers(&self) -> Result> { + let controllers_path = self.root_path.join(CGROUP_CONTROLLERS); if !controllers_path.exists() { bail!( "cannot get available controllers. {:?} does not exist", @@ -112,17 +112,20 @@ impl Manager { } impl CgroupManager for Manager { - fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> { - let full_cgroup_path = self.create_unified_cgroup(&self.cgroup_path, pid)?; + fn add_task(&self, pid: Pid) -> Result<()> { + self.create_unified_cgroup(pid)?; + Ok(()) + } + fn apply(&self, linux_resources: &LinuxResources) -> Result<()> { for controller in CONTROLLER_TYPES { match controller { - ControllerType::Cpu => Cpu::apply(linux_resources, &full_cgroup_path)?, - ControllerType::CpuSet => CpuSet::apply(linux_resources, &full_cgroup_path)?, - ControllerType::HugeTlb => HugeTlb::apply(linux_resources, &&full_cgroup_path)?, - ControllerType::Io => Io::apply(linux_resources, &&full_cgroup_path)?, - ControllerType::Memory => Memory::apply(linux_resources, &full_cgroup_path)?, - ControllerType::Pids => Pids::apply(linux_resources, &&full_cgroup_path)?, + ControllerType::Cpu => Cpu::apply(linux_resources, &self.full_path)?, + ControllerType::CpuSet => CpuSet::apply(linux_resources, &self.full_path)?, + ControllerType::HugeTlb => HugeTlb::apply(linux_resources, &self.full_path)?, + ControllerType::Io => Io::apply(linux_resources, &self.full_path)?, + ControllerType::Memory => Memory::apply(linux_resources, &self.full_path)?, + ControllerType::Pids => Pids::apply(linux_resources, &self.full_path)?, } } @@ -130,9 +133,8 @@ impl CgroupManager for Manager { } fn remove(&self) -> Result<()> { - let full_path = self.root_path.join_absolute_path(&self.cgroup_path)?; - log::debug!("remove cgroup {:?}", full_path); - fs::remove_dir_all(full_path)?; + log::debug!("remove cgroup {:?}", self.full_path); + fs::remove_dir_all(&self.full_path)?; Ok(()) } diff --git a/src/cgroups/v2/systemd_manager.rs b/src/cgroups/v2/systemd_manager.rs index a21b65bf6..726ed1a7e 100644 --- a/src/cgroups/v2/systemd_manager.rs +++ b/src/cgroups/v2/systemd_manager.rs @@ -30,7 +30,8 @@ const CONTROLLER_TYPES: &[ControllerType] = &[ /// SystemDCGroupManager is a driver for managing cgroups via systemd. pub struct SystemDCGroupManager { root_path: PathBuf, - cgroups_path: CgroupsPath, + cgroups_path: PathBuf, + full_path: PathBuf, } /// Represents the systemd cgroups path: @@ -45,6 +46,19 @@ struct CgroupsPath { impl SystemDCGroupManager { pub fn new(root_path: PathBuf, cgroups_path: PathBuf) -> Result { + // TODO: create the systemd unit using a dbus client. + let cgroups_path = Self::new_cgroups_path(cgroups_path)?; + let cgroups_path = Self::get_cgroups_path(cgroups_path)?; + let full_path = root_path.join_absolute_path(&cgroups_path)?; + + Ok(SystemDCGroupManager { + root_path, + cgroups_path, + full_path, + }) + } + + fn new_cgroups_path(cgroups_path: PathBuf) -> Result { // cgroups path may never be empty as it is defaulted to `/youki` // see 'get_cgroup_path' under utils.rs. // if cgroups_path was provided it should be of the form [slice]:[scope_prefix]:[name], @@ -69,35 +83,27 @@ impl SystemDCGroupManager { name = parts[2]; } - // TODO: create the systemd unit using a dbus client. - - Ok(SystemDCGroupManager { - root_path, - cgroups_path: CgroupsPath { - parent: parent.to_owned(), - scope: scope.to_owned(), - name: name.to_owned(), - }, + Ok(CgroupsPath { + parent: parent.to_owned(), + scope: scope.to_owned(), + name: name.to_owned(), }) } /// get_unit_name returns the unit (scope) name from the path provided by the user /// for example: foo:docker:bar returns in '/docker-bar.scope' - fn get_unit_name(&self) -> String { + fn get_unit_name(cgroups_path: CgroupsPath) -> String { // By default we create a scope unless specified explicitly. - if !self.cgroups_path.name.ends_with(".slice") { - return format!( - "{}-{}.scope", - self.cgroups_path.scope, self.cgroups_path.name - ); + if !cgroups_path.name.ends_with(".slice") { + return format!("{}-{}.scope", cgroups_path.scope, cgroups_path.name); } - self.cgroups_path.name.clone() + cgroups_path.name.clone() } // systemd represents slice hierarchy using `-`, so we need to follow suit when // generating the path of slice. For example, 'test-a-b.slice' becomes // '/test.slice/test-a.slice/test-a-b.slice'. - fn expand_slice(&self, slice: &str) -> Result { + fn expand_slice(slice: &str) -> Result { let suffix = ".slice"; if slice.len() <= suffix.len() || !slice.ends_with(suffix) { bail!("invalid slice name: {}", slice); @@ -125,15 +131,15 @@ impl SystemDCGroupManager { // get_cgroups_path generates a cgroups path from the one provided by the user via cgroupsPath. // an example of the final path: "/machine.slice/docker-foo.scope" - fn get_cgroups_path(&self) -> Result { + fn get_cgroups_path(cgroups_path: CgroupsPath) -> Result { // the root slice is under 'machine.slice'. let mut slice = Path::new("/machine.slice").to_path_buf(); // if the user provided a '.slice' (as in a branch of a tree) // we need to "unpack it". - if !self.cgroups_path.parent.is_empty() { - slice = self.expand_slice(&self.cgroups_path.parent)?; + if !cgroups_path.parent.is_empty() { + slice = Self::expand_slice(&cgroups_path.parent)?; } - let unit_name = self.get_unit_name(); + let unit_name = Self::get_unit_name(cgroups_path); let cgroups_path = slice.join(unit_name); Ok(cgroups_path) } @@ -141,9 +147,7 @@ impl SystemDCGroupManager { /// create_unified_cgroup verifies sure that *each level* in the downward path from the root cgroup /// down to the cgroup_path provided by the user is a valid cgroup hierarchy, /// containing the attached controllers and that it contains the container pid. - fn create_unified_cgroup(&self, pid: Pid) -> Result { - let cgroups_path = self.get_cgroups_path()?; - let full_path = self.root_path.join_absolute_path(&cgroups_path)?; + fn create_unified_cgroup(&self, pid: Pid) -> Result<()> { let controllers: Vec = self .get_available_controllers(&self.root_path)? .into_iter() @@ -154,7 +158,7 @@ impl SystemDCGroupManager { Self::write_controllers(&self.root_path, &controllers)?; let mut current_path = self.root_path.clone(); - let mut components = cgroups_path.components().skip(1).peekable(); + let mut components = self.cgroups_path.components().skip(1).peekable(); // Verify that *each level* in the downward path from the root cgroup // down to the cgroup_path provided by the user is a valid cgroup hierarchy. // containing the attached controllers. @@ -172,8 +176,8 @@ impl SystemDCGroupManager { } } - common::write_cgroup_file(full_path.join(CGROUP_PROCS), &pid)?; - Ok(full_path) + common::write_cgroup_file(self.full_path.join(CGROUP_PROCS), &pid)?; + Ok(()) } fn get_available_controllers>( @@ -212,21 +216,25 @@ impl SystemDCGroupManager { } impl CgroupManager for SystemDCGroupManager { - fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> { + fn add_task(&self, pid: Pid) -> Result<()> { // Dont attach any pid to the cgroup if -1 is specified as a pid if pid.as_raw() == -1 { return Ok(()); } - let full_cgroup_path = self.create_unified_cgroup(pid)?; + self.create_unified_cgroup(pid)?; + Ok(()) + } + + fn apply(&self, linux_resources: &LinuxResources) -> Result<()> { for controller in CONTROLLER_TYPES { match controller { - ControllerType::Cpu => Cpu::apply(linux_resources, &full_cgroup_path)?, - ControllerType::CpuSet => CpuSet::apply(linux_resources, &full_cgroup_path)?, - ControllerType::HugeTlb => HugeTlb::apply(linux_resources, &&full_cgroup_path)?, - ControllerType::Io => Io::apply(linux_resources, &&full_cgroup_path)?, - ControllerType::Memory => Memory::apply(linux_resources, &full_cgroup_path)?, - ControllerType::Pids => Pids::apply(linux_resources, &&full_cgroup_path)?, + ControllerType::Cpu => Cpu::apply(linux_resources, &self.full_path)?, + ControllerType::CpuSet => CpuSet::apply(linux_resources, &self.full_path)?, + ControllerType::HugeTlb => HugeTlb::apply(linux_resources, &self.full_path)?, + ControllerType::Io => Io::apply(linux_resources, &self.full_path)?, + ControllerType::Memory => Memory::apply(linux_resources, &self.full_path)?, + ControllerType::Pids => Pids::apply(linux_resources, &self.full_path)?, } } @@ -244,13 +252,8 @@ mod tests { #[test] fn expand_slice_works() -> Result<()> { - let manager = SystemDCGroupManager::new( - PathBuf::from("/sys/fs/cgroup"), - PathBuf::from("test-a-b.slice:docker:foo"), - )?; - assert_eq!( - manager.expand_slice("test-a-b.slice")?, + SystemDCGroupManager::expand_slice("test-a-b.slice")?, PathBuf::from("/test.slice/test-a.slice/test-a-b.slice"), ); @@ -259,13 +262,12 @@ mod tests { #[test] fn get_cgroups_path_works_with_a_complex_slice() -> Result<()> { - let manager = SystemDCGroupManager::new( - PathBuf::from("/sys/fs/cgroup"), - PathBuf::from("test-a-b.slice:docker:foo"), - )?; + let cgroups_path = + SystemDCGroupManager::new_cgroups_path(PathBuf::from("test-a-b.slice:docker:foo")) + .expect(""); assert_eq!( - manager.get_cgroups_path()?, + SystemDCGroupManager::get_cgroups_path(cgroups_path)?, PathBuf::from("/test.slice/test-a.slice/test-a-b.slice/docker-foo.scope"), ); @@ -274,13 +276,12 @@ mod tests { #[test] fn get_cgroups_path_works_with_a_simple_slice() -> Result<()> { - let manager = SystemDCGroupManager::new( - PathBuf::from("/sys/fs/cgroup"), - PathBuf::from("machine.slice:libpod:foo"), - )?; + let cgroups_path = + SystemDCGroupManager::new_cgroups_path(PathBuf::from("machine.slice:libpod:foo")) + .expect(""); assert_eq!( - manager.get_cgroups_path()?, + SystemDCGroupManager::get_cgroups_path(cgroups_path)?, PathBuf::from("/machine.slice/libpod-foo.scope"), ); @@ -289,13 +290,11 @@ mod tests { #[test] fn get_cgroups_path_works_with_scope() -> Result<()> { - let manager = SystemDCGroupManager::new( - PathBuf::from("/sys/fs/cgroup"), - PathBuf::from(":docker:foo"), - )?; + let cgroups_path = + SystemDCGroupManager::new_cgroups_path(PathBuf::from(":docker:foo")).expect(""); assert_eq!( - manager.get_cgroups_path()?, + SystemDCGroupManager::get_cgroups_path(cgroups_path)?, PathBuf::from("/machine.slice/docker-foo.scope"), ); diff --git a/src/process/fork.rs b/src/process/fork.rs index 211d59915..4fe23ff76 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -69,7 +69,8 @@ pub fn fork_first>( let init_pid = parent.wait_for_child_ready(child)?; log::debug!("init pid is {:?}", init_pid); if rootless.is_none() && linux.resources.is_some() { - cmanager.apply(&linux.resources.as_ref().unwrap(), Pid::from_raw(init_pid))?; + cmanager.add_task(Pid::from_raw(init_pid))?; + cmanager.apply(&linux.resources.as_ref().unwrap())?; } // update status and pid of the container process From 8f872d74caaca6b48b2de84108b4d08175acd30c Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 26 Jun 2021 10:58:31 +0200 Subject: [PATCH 28/70] Shorten names --- src/cgroups/v1/manager.rs | 50 +++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/cgroups/v1/manager.rs b/src/cgroups/v1/manager.rs index 2398b97b3..a79773027 100644 --- a/src/cgroups/v1/manager.rs +++ b/src/cgroups/v1/manager.rs @@ -7,7 +7,7 @@ use nix::unistd::Pid; use procfs::process::Process; -use super::ControllerType; +use super::ControllerType as CtrlType; use super::{ blkio::Blkio, controller_type::CONTROLLERS, cpu::Cpu, cpuacct::CpuAcct, cpuset::CpuSet, devices::Devices, freezer::Freezer, hugetlb::Hugetlb, memory::Memory, @@ -20,12 +20,12 @@ use crate::utils; use crate::{cgroups::common::CgroupManager, utils::PathBufExt}; use oci_spec::LinuxResources; pub struct Manager { - subsystems: HashMap, + subsystems: HashMap, } impl Manager { pub fn new(cgroup_path: PathBuf) -> Result { - let mut subsystems = HashMap::::new(); + let mut subsystems = HashMap::::new(); for subsystem in CONTROLLERS { subsystems.insert( subsystem.clone(), @@ -62,16 +62,16 @@ impl CgroupManager for Manager { fn add_task(&self, pid: Pid) -> Result<()> { for subsys in &self.subsystems { match subsys.0 { - ControllerType::Cpu => Cpu::add_task(pid, subsys.1)?, - ControllerType::CpuAcct => CpuAcct::add_task(pid, subsys.1)?, - ControllerType::CpuSet => CpuSet::add_task(pid, subsys.1)?, - ControllerType::Devices => Devices::add_task(pid, subsys.1)?, - ControllerType::HugeTlb => Hugetlb::add_task(pid, subsys.1)?, - ControllerType::Memory => Memory::add_task(pid, subsys.1)?, - ControllerType::Pids => Pids::add_task(pid, subsys.1)?, - ControllerType::Blkio => Blkio::add_task(pid, subsys.1)?, - ControllerType::NetworkPriority => NetworkPriority::add_task(pid, subsys.1)?, - ControllerType::NetworkClassifier => NetworkClassifier::add_task(pid, subsys.1)?, + CtrlType::Cpu => Cpu::add_task(pid, subsys.1)?, + CtrlType::CpuAcct => CpuAcct::add_task(pid, subsys.1)?, + CtrlType::CpuSet => CpuSet::add_task(pid, subsys.1)?, + CtrlType::Devices => Devices::add_task(pid, subsys.1)?, + CtrlType::HugeTlb => Hugetlb::add_task(pid, subsys.1)?, + CtrlType::Memory => Memory::add_task(pid, subsys.1)?, + CtrlType::Pids => Pids::add_task(pid, subsys.1)?, + CtrlType::Blkio => Blkio::add_task(pid, subsys.1)?, + CtrlType::NetworkPriority => NetworkPriority::add_task(pid, subsys.1)?, + CtrlType::NetworkClassifier => NetworkClassifier::add_task(pid, subsys.1)?, _ => continue, } } @@ -82,21 +82,19 @@ impl CgroupManager for Manager { fn apply(&self, linux_resources: &LinuxResources) -> Result<()> { for subsys in &self.subsystems { match subsys.0 { - ControllerType::Cpu => Cpu::apply(linux_resources, &subsys.1)?, - ControllerType::CpuAcct => CpuAcct::apply(linux_resources, &subsys.1)?, - ControllerType::CpuSet => CpuSet::apply(linux_resources, &subsys.1)?, - ControllerType::Devices => Devices::apply(linux_resources, &subsys.1)?, - ControllerType::HugeTlb => Hugetlb::apply(linux_resources, &subsys.1)?, - ControllerType::Memory => Memory::apply(linux_resources, &subsys.1)?, - ControllerType::Pids => Pids::apply(linux_resources, &subsys.1)?, - ControllerType::Blkio => Blkio::apply(linux_resources, &subsys.1)?, - ControllerType::NetworkPriority => { - NetworkPriority::apply(linux_resources, &subsys.1)? - } - ControllerType::NetworkClassifier => { + CtrlType::Cpu => Cpu::apply(linux_resources, &subsys.1)?, + CtrlType::CpuAcct => CpuAcct::apply(linux_resources, &subsys.1)?, + CtrlType::CpuSet => CpuSet::apply(linux_resources, &subsys.1)?, + CtrlType::Devices => Devices::apply(linux_resources, &subsys.1)?, + CtrlType::HugeTlb => Hugetlb::apply(linux_resources, &subsys.1)?, + CtrlType::Memory => Memory::apply(linux_resources, &subsys.1)?, + CtrlType::Pids => Pids::apply(linux_resources, &subsys.1)?, + CtrlType::Blkio => Blkio::apply(linux_resources, &subsys.1)?, + CtrlType::NetworkPriority => NetworkPriority::apply(linux_resources, &subsys.1)?, + CtrlType::NetworkClassifier => { NetworkClassifier::apply(linux_resources, &subsys.1)? } - ControllerType::Freezer => Freezer::apply(linux_resources, &subsys.1)?, + CtrlType::Freezer => Freezer::apply(linux_resources, &subsys.1)?, } } From 80e9670c4da44da445183012991904b5fd0ea4ca Mon Sep 17 00:00:00 2001 From: Brett Kochendorfer Date: Sat, 26 Jun 2021 08:36:15 -0500 Subject: [PATCH 29/70] Update README.md Change `dnf` to `apt-get` for Debian based systems --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e484a120c..89dc3ba8a 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ For other platforms, please use the devcontainer that we prepared. ### Debian, Ubuntu and related distributions ```sh -$ sudo dnf install \ +$ sudo apt-get install \ pkg-config \ libsystemd-dev \ libdbus-glib-1-dev From fcfff8bf4594e7aae48695c01c9fb7bdfdf5c527 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 26 Jun 2021 11:03:54 +0200 Subject: [PATCH 30/70] Address review comments - Add comments for functions - Use better naming in systemd cgroup manager --- src/cgroups/common.rs | 3 +++ src/cgroups/v1/manager.rs | 1 + src/cgroups/v2/manager.rs | 2 ++ src/cgroups/v2/systemd_manager.rs | 25 ++++++++++++------------- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/cgroups/common.rs b/src/cgroups/common.rs index 84a6dc148..20fd04101 100644 --- a/src/cgroups/common.rs +++ b/src/cgroups/common.rs @@ -19,8 +19,11 @@ pub const CGROUP_PROCS: &str = "cgroup.procs"; pub const DEFAULT_CGROUP_ROOT: &str = "/sys/fs/cgroup"; pub trait CgroupManager { + /// Adds a task specified by its pid to the cgroup fn add_task(&self, pid: Pid) -> Result<()>; + /// Applies resource restrictions to the cgroup fn apply(&self, linux_resources: &LinuxResources) -> Result<()>; + /// Removes the cgroup fn remove(&self) -> Result<()>; } diff --git a/src/cgroups/v1/manager.rs b/src/cgroups/v1/manager.rs index a79773027..3266f3bf1 100644 --- a/src/cgroups/v1/manager.rs +++ b/src/cgroups/v1/manager.rs @@ -24,6 +24,7 @@ pub struct Manager { } impl Manager { + /// Constructs a new cgroup manager with cgroups_path being relative to the root of the subsystem pub fn new(cgroup_path: PathBuf) -> Result { let mut subsystems = HashMap::::new(); for subsystem in CONTROLLERS { diff --git a/src/cgroups/v2/manager.rs b/src/cgroups/v2/manager.rs index 1da2aae4f..5a605e294 100644 --- a/src/cgroups/v2/manager.rs +++ b/src/cgroups/v2/manager.rs @@ -38,6 +38,8 @@ pub struct Manager { } impl Manager { + /// Constructs a new cgroup manager with root path being the mount point + /// of a cgroup v2 fs and cgroup path being a relative path from the root pub fn new(root_path: PathBuf, cgroup_path: PathBuf) -> Result { let full_path = root_path.join_absolute_path(&cgroup_path)?; diff --git a/src/cgroups/v2/systemd_manager.rs b/src/cgroups/v2/systemd_manager.rs index 726ed1a7e..375fc1a7e 100644 --- a/src/cgroups/v2/systemd_manager.rs +++ b/src/cgroups/v2/systemd_manager.rs @@ -47,8 +47,8 @@ struct CgroupsPath { impl SystemDCGroupManager { pub fn new(root_path: PathBuf, cgroups_path: PathBuf) -> Result { // TODO: create the systemd unit using a dbus client. - let cgroups_path = Self::new_cgroups_path(cgroups_path)?; - let cgroups_path = Self::get_cgroups_path(cgroups_path)?; + let destructured_path = Self::destructure_cgroups_path(cgroups_path)?; + let cgroups_path = Self::construct_cgroups_path(destructured_path)?; let full_path = root_path.join_absolute_path(&cgroups_path)?; Ok(SystemDCGroupManager { @@ -58,7 +58,7 @@ impl SystemDCGroupManager { }) } - fn new_cgroups_path(cgroups_path: PathBuf) -> Result { + fn destructure_cgroups_path(cgroups_path: PathBuf) -> Result { // cgroups path may never be empty as it is defaulted to `/youki` // see 'get_cgroup_path' under utils.rs. // if cgroups_path was provided it should be of the form [slice]:[scope_prefix]:[name], @@ -97,7 +97,7 @@ impl SystemDCGroupManager { if !cgroups_path.name.ends_with(".slice") { return format!("{}-{}.scope", cgroups_path.scope, cgroups_path.name); } - cgroups_path.name.clone() + cgroups_path.name } // systemd represents slice hierarchy using `-`, so we need to follow suit when @@ -131,7 +131,7 @@ impl SystemDCGroupManager { // get_cgroups_path generates a cgroups path from the one provided by the user via cgroupsPath. // an example of the final path: "/machine.slice/docker-foo.scope" - fn get_cgroups_path(cgroups_path: CgroupsPath) -> Result { + fn construct_cgroups_path(cgroups_path: CgroupsPath) -> Result { // the root slice is under 'machine.slice'. let mut slice = Path::new("/machine.slice").to_path_buf(); // if the user provided a '.slice' (as in a branch of a tree) @@ -176,8 +176,7 @@ impl SystemDCGroupManager { } } - common::write_cgroup_file(self.full_path.join(CGROUP_PROCS), &pid)?; - Ok(()) + common::write_cgroup_file(self.full_path.join(CGROUP_PROCS), pid) } fn get_available_controllers>( @@ -263,11 +262,11 @@ mod tests { #[test] fn get_cgroups_path_works_with_a_complex_slice() -> Result<()> { let cgroups_path = - SystemDCGroupManager::new_cgroups_path(PathBuf::from("test-a-b.slice:docker:foo")) + SystemDCGroupManager::destructure_cgroups_path(PathBuf::from("test-a-b.slice:docker:foo")) .expect(""); assert_eq!( - SystemDCGroupManager::get_cgroups_path(cgroups_path)?, + SystemDCGroupManager::construct_cgroups_path(cgroups_path)?, PathBuf::from("/test.slice/test-a.slice/test-a-b.slice/docker-foo.scope"), ); @@ -277,11 +276,11 @@ mod tests { #[test] fn get_cgroups_path_works_with_a_simple_slice() -> Result<()> { let cgroups_path = - SystemDCGroupManager::new_cgroups_path(PathBuf::from("machine.slice:libpod:foo")) + SystemDCGroupManager::destructure_cgroups_path(PathBuf::from("machine.slice:libpod:foo")) .expect(""); assert_eq!( - SystemDCGroupManager::get_cgroups_path(cgroups_path)?, + SystemDCGroupManager::construct_cgroups_path(cgroups_path)?, PathBuf::from("/machine.slice/libpod-foo.scope"), ); @@ -291,10 +290,10 @@ mod tests { #[test] fn get_cgroups_path_works_with_scope() -> Result<()> { let cgroups_path = - SystemDCGroupManager::new_cgroups_path(PathBuf::from(":docker:foo")).expect(""); + SystemDCGroupManager::destructure_cgroups_path(PathBuf::from(":docker:foo")).expect(""); assert_eq!( - SystemDCGroupManager::get_cgroups_path(cgroups_path)?, + SystemDCGroupManager::construct_cgroups_path(cgroups_path)?, PathBuf::from("/machine.slice/docker-foo.scope"), ); From 99342d1581a059170dffa26ce488f3fc3957ae1b Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Mon, 28 Jun 2021 20:06:53 +0200 Subject: [PATCH 31/70] Require only cgroups that are needed to fullfill the resource restrictions --- src/cgroups/v1/blkio.rs | 12 +++++- src/cgroups/v1/controller.rs | 7 ++++ src/cgroups/v1/cpu.rs | 19 +++++++++- src/cgroups/v1/cpuacct.rs | 7 ++++ src/cgroups/v1/cpuset.rs | 16 +++++++- src/cgroups/v1/devices.rs | 7 ++++ src/cgroups/v1/freezer.rs | 20 +++++++--- src/cgroups/v1/hugetlb.rs | 16 +++++++- src/cgroups/v1/manager.rs | 55 ++++++++++++++++++++++++---- src/cgroups/v1/memory.rs | 12 +++++- src/cgroups/v1/network_classifier.rs | 12 +++++- src/cgroups/v1/network_priority.rs | 12 +++++- src/cgroups/v1/pids.rs | 16 ++++++-- src/cgroups/v1/util.rs | 11 +++--- 14 files changed, 192 insertions(+), 30 deletions(-) diff --git a/src/cgroups/v1/blkio.rs b/src/cgroups/v1/blkio.rs index d80dc36af..3480084c1 100644 --- a/src/cgroups/v1/blkio.rs +++ b/src/cgroups/v1/blkio.rs @@ -12,15 +12,25 @@ const CGROUP_BLKIO_THROTTLE_WRITE_IOPS: &str = "blkio.throttle.write_iops_device pub struct Blkio {} impl Controller for Blkio { + type Resource = LinuxBlockIo; + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply blkio cgroup config"); - if let Some(blkio) = &linux_resources.block_io { + if let Some(blkio) = Self::needs_to_handle(linux_resources) { Self::apply(cgroup_root, blkio)?; } Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if let Some(blkio) = &linux_resources.block_io { + return Some(blkio); + } + + None + } } impl Blkio { diff --git a/src/cgroups/v1/controller.rs b/src/cgroups/v1/controller.rs index 408d357da..9aaa8fcae 100644 --- a/src/cgroups/v1/controller.rs +++ b/src/cgroups/v1/controller.rs @@ -8,11 +8,18 @@ use oci_spec::LinuxResources; use crate::cgroups::common::{self, CGROUP_PROCS}; pub trait Controller { + type Resource; + + /// Adds a new task specified by its pid to the cgroup fn add_task(pid: Pid, cgroup_path: &Path) -> Result<()> { fs::create_dir_all(cgroup_path)?; common::write_cgroup_file(cgroup_path.join(CGROUP_PROCS), pid)?; Ok(()) } + /// Applies resource restrictions to the cgroup fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()>; + + /// Checks if the controller needs to handle this request + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource>; } diff --git a/src/cgroups/v1/cpu.rs b/src/cgroups/v1/cpu.rs index 856d90a0b..006bc09cf 100644 --- a/src/cgroups/v1/cpu.rs +++ b/src/cgroups/v1/cpu.rs @@ -16,15 +16,32 @@ const CGROUP_CPU_RT_PERIOD: &str = "cpu.rt_period_us"; pub struct Cpu {} impl Controller for Cpu { + type Resource = LinuxCpu; + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply Cpu cgroup config"); - if let Some(cpu) = &linux_resources.cpu { + if let Some(cpu) = Self::needs_to_handle(linux_resources) { Self::apply(cgroup_root, cpu)?; } Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if let Some(cpu) = &linux_resources.cpu { + if cpu.shares.is_some() + || cpu.period.is_some() + || cpu.quota.is_some() + || cpu.realtime_period.is_some() + || cpu.realtime_runtime.is_some() + { + return Some(cpu); + } + } + + None + } } impl Cpu { diff --git a/src/cgroups/v1/cpuacct.rs b/src/cgroups/v1/cpuacct.rs index 889f905d0..2632847e2 100644 --- a/src/cgroups/v1/cpuacct.rs +++ b/src/cgroups/v1/cpuacct.rs @@ -8,9 +8,16 @@ use super::Controller; pub struct CpuAcct {} impl Controller for CpuAcct { + type Resource = (); + fn apply(_linux_resources: &LinuxResources, _cgroup_path: &Path) -> Result<()> { Ok(()) } + + // apply never needs to be called, for accounting only + fn needs_to_handle(_linux_resources: &LinuxResources) -> Option<&Self::Resource> { + None + } } #[cfg(test)] diff --git a/src/cgroups/v1/cpuset.rs b/src/cgroups/v1/cpuset.rs index e232120c3..b7d669311 100644 --- a/src/cgroups/v1/cpuset.rs +++ b/src/cgroups/v1/cpuset.rs @@ -15,6 +15,8 @@ const CGROUP_CPUSET_MEMS: &str = "cpuset.mems"; pub struct CpuSet {} impl Controller for CpuSet { + type Resource = LinuxCpu; + fn add_task(pid: Pid, cgroup_path: &Path) -> Result<()> { fs::create_dir_all(cgroup_path)?; @@ -28,12 +30,22 @@ impl Controller for CpuSet { fn apply(linux_resources: &LinuxResources, cgroup_path: &Path) -> Result<()> { log::debug!("Apply CpuSet cgroup config"); - if let Some(cpuset) = &linux_resources.cpu { + if let Some(cpuset) = Self::needs_to_handle(linux_resources) { Self::apply(cgroup_path, cpuset)?; } Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if let Some(cpuset) = &linux_resources.cpu { + if cpuset.cpus.is_some() || cpuset.mems.is_some() { + return Some(cpuset); + } + } + + None + } } impl CpuSet { @@ -52,7 +64,7 @@ impl CpuSet { // if a task is moved into the cgroup and a value has not been set for cpus and mems // Errno 28 (no space left on device) will be returned. Therefore we set the value from the parent if required. fn ensure_not_empty(cgroup_path: &Path, interface_file: &str) -> Result<()> { - let mut current = util::get_subsystem_mount_points(&ControllerType::CpuSet.to_string())?; + let mut current = util::get_subsystem_mount_point(&ControllerType::CpuSet)?; let relative_cgroup_path = cgroup_path.strip_prefix(¤t)?; for component in relative_cgroup_path.components() { diff --git a/src/cgroups/v1/devices.rs b/src/cgroups/v1/devices.rs index 1f71bfcda..3e5f12705 100644 --- a/src/cgroups/v1/devices.rs +++ b/src/cgroups/v1/devices.rs @@ -9,6 +9,8 @@ use oci_spec::{LinuxDeviceCgroup, LinuxDeviceType, LinuxResources}; pub struct Devices {} impl Controller for Devices { + type Resource = (); + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply Devices cgroup config"); @@ -27,6 +29,11 @@ impl Controller for Devices { Ok(()) } + + // always needs to be called due to default devices + fn needs_to_handle(_linux_resources: &LinuxResources) -> Option<&Self::Resource> { + Some(&()) + } } impl Devices { diff --git a/src/cgroups/v1/freezer.rs b/src/cgroups/v1/freezer.rs index 27806ca0c..4a4dd090c 100644 --- a/src/cgroups/v1/freezer.rs +++ b/src/cgroups/v1/freezer.rs @@ -19,20 +19,30 @@ const FREEZER_STATE_FREEZING: &str = "FREEZING"; pub struct Freezer {} impl Controller for Freezer { + type Resource = FreezerState; + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply Freezer cgroup config"); create_dir_all(&cgroup_root)?; - if let Some(freezer_state) = linux_resources.freezer { + if let Some(freezer_state) = Self::needs_to_handle(linux_resources) { Self::apply(freezer_state, cgroup_root)?; } Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if let Some(freezer_state) = &linux_resources.freezer { + return Some(freezer_state); + } + + None + } } impl Freezer { - fn apply(freezer_state: FreezerState, cgroup_root: &Path) -> Result<()> { + fn apply(freezer_state: &FreezerState, cgroup_root: &Path) -> Result<()> { match freezer_state { FreezerState::Undefined => {} FreezerState::Thawed => { @@ -129,7 +139,7 @@ mod tests { // set Frozen state. { let freezer_state = FreezerState::Frozen; - Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + Freezer::apply(&freezer_state, &tmp).expect("Set freezer state"); let state_content = std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); @@ -139,7 +149,7 @@ mod tests { // set Thawed state. { let freezer_state = FreezerState::Thawed; - Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + Freezer::apply(&freezer_state, &tmp).expect("Set freezer state"); let state_content = std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); @@ -151,7 +161,7 @@ mod tests { let old_state_content = std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); let freezer_state = FreezerState::Undefined; - Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + Freezer::apply(&freezer_state, &tmp).expect("Set freezer state"); let state_content = std::fs::read_to_string(tmp.join(CGROUP_FREEZER_STATE)).expect("Read to string"); diff --git a/src/cgroups/v1/hugetlb.rs b/src/cgroups/v1/hugetlb.rs index 1ee473f79..7b4b2b66a 100644 --- a/src/cgroups/v1/hugetlb.rs +++ b/src/cgroups/v1/hugetlb.rs @@ -9,15 +9,27 @@ use oci_spec::{LinuxHugepageLimit, LinuxResources}; pub struct Hugetlb {} impl Controller for Hugetlb { + type Resource = Vec; + fn apply(linux_resources: &LinuxResources, cgroup_root: &std::path::Path) -> Result<()> { log::debug!("Apply Hugetlb cgroup config"); - for hugetlb in &linux_resources.hugepage_limits { - Self::apply(cgroup_root, hugetlb)? + if let Some(hugepage_limits) = Self::needs_to_handle(linux_resources) { + for hugetlb in hugepage_limits { + Self::apply(cgroup_root, hugetlb)? + } } Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if !linux_resources.hugepage_limits.is_empty() { + return Some(&linux_resources.hugepage_limits); + } + + None + } } impl Hugetlb { diff --git a/src/cgroups/v1/manager.rs b/src/cgroups/v1/manager.rs index 3266f3bf1..b3aae533c 100644 --- a/src/cgroups/v1/manager.rs +++ b/src/cgroups/v1/manager.rs @@ -2,6 +2,7 @@ use std::fs; use std::path::Path; use std::{collections::HashMap, path::PathBuf}; +use anyhow::bail; use anyhow::Result; use nix::unistd::Pid; @@ -28,23 +29,24 @@ impl Manager { pub fn new(cgroup_path: PathBuf) -> Result { let mut subsystems = HashMap::::new(); for subsystem in CONTROLLERS { - subsystems.insert( - subsystem.clone(), - Self::get_subsystem_path(&cgroup_path, &subsystem.to_string())?, - ); + if let Ok(subsystem_path) = Self::get_subsystem_path(&cgroup_path, subsystem) { + subsystems.insert(subsystem.clone(), subsystem_path); + } else { + log::warn!("Cgroup {} not supported on this system", subsystem); + } } Ok(Manager { subsystems }) } - fn get_subsystem_path(cgroup_path: &Path, subsystem: &str) -> anyhow::Result { + fn get_subsystem_path(cgroup_path: &Path, subsystem: &CtrlType) -> Result { log::debug!("Get path for subsystem: {}", subsystem); - let mount_point = util::get_subsystem_mount_points(subsystem)?; + let mount_point = util::get_subsystem_mount_point(subsystem)?; let cgroup = Process::myself()? .cgroups()? .into_iter() - .find(|c| c.controllers.contains(&subsystem.to_owned())) + .find(|c| c.controllers.contains(&subsystem.to_string())) .unwrap(); let p = if cgroup_path.to_string_lossy().into_owned().is_empty() { @@ -57,6 +59,43 @@ impl Manager { Ok(p) } + + fn get_required_controllers( + &self, + linux_resources: &LinuxResources, + ) -> Result> { + let mut required_controllers = HashMap::new(); + + for controller in CONTROLLERS { + let required = match controller { + CtrlType::Cpu => Cpu::needs_to_handle(linux_resources).is_some(), + CtrlType::CpuAcct => CpuAcct::needs_to_handle(linux_resources).is_some(), + CtrlType::CpuSet => CpuSet::needs_to_handle(linux_resources).is_some(), + CtrlType::Devices => Devices::needs_to_handle(linux_resources).is_some(), + CtrlType::HugeTlb => Hugetlb::needs_to_handle(linux_resources).is_some(), + CtrlType::Memory => Memory::needs_to_handle(linux_resources).is_some(), + CtrlType::Pids => Pids::needs_to_handle(linux_resources).is_some(), + CtrlType::Blkio => Blkio::needs_to_handle(linux_resources).is_some(), + CtrlType::NetworkPriority => { + NetworkPriority::needs_to_handle(linux_resources).is_some() + } + CtrlType::NetworkClassifier => { + NetworkClassifier::needs_to_handle(linux_resources).is_some() + } + CtrlType::Freezer => Freezer::needs_to_handle(linux_resources).is_some(), + }; + + if required { + if let Some(subsystem_path) = self.subsystems.get(controller) { + required_controllers.insert(controller, subsystem_path); + } else { + bail!("Cgroup {} is required to fullfill the request, but is not supported by this system", controller); + } + } + } + + Ok(required_controllers) + } } impl CgroupManager for Manager { @@ -81,7 +120,7 @@ impl CgroupManager for Manager { } fn apply(&self, linux_resources: &LinuxResources) -> Result<()> { - for subsys in &self.subsystems { + for subsys in self.get_required_controllers(linux_resources)? { match subsys.0 { CtrlType::Cpu => Cpu::apply(linux_resources, &subsys.1)?, CtrlType::CpuAcct => CpuAcct::apply(linux_resources, &subsys.1)?, diff --git a/src/cgroups/v1/memory.rs b/src/cgroups/v1/memory.rs index aa09e58e7..760f1d565 100644 --- a/src/cgroups/v1/memory.rs +++ b/src/cgroups/v1/memory.rs @@ -22,10 +22,12 @@ const CGROUP_KERNEL_TCP_MEMORY_LIMIT: &str = "memory.kmem.tcp.limit_in_bytes"; pub struct Memory {} impl Controller for Memory { + type Resource = LinuxMemory; + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply Memory cgroup config"); - if let Some(memory) = &linux_resources.memory { + if let Some(memory) = Self::needs_to_handle(linux_resources) { let reservation = memory.reservation.unwrap_or(0); Self::apply(&memory, cgroup_root)?; @@ -74,6 +76,14 @@ impl Controller for Memory { Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if let Some(memory) = &linux_resources.memory { + return Some(memory); + } + + None + } } impl Memory { diff --git a/src/cgroups/v1/network_classifier.rs b/src/cgroups/v1/network_classifier.rs index fed3ae2e7..551fc6726 100644 --- a/src/cgroups/v1/network_classifier.rs +++ b/src/cgroups/v1/network_classifier.rs @@ -9,15 +9,25 @@ use oci_spec::{LinuxNetwork, LinuxResources}; pub struct NetworkClassifier {} impl Controller for NetworkClassifier { + type Resource = LinuxNetwork; + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply NetworkClassifier cgroup config"); - if let Some(network) = linux_resources.network.as_ref() { + if let Some(network) = Self::needs_to_handle(linux_resources) { Self::apply(cgroup_root, network)?; } Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if let Some(network) = &linux_resources.network { + return Some(network); + } + + None + } } impl NetworkClassifier { diff --git a/src/cgroups/v1/network_priority.rs b/src/cgroups/v1/network_priority.rs index 6a25a017c..63683bc3c 100644 --- a/src/cgroups/v1/network_priority.rs +++ b/src/cgroups/v1/network_priority.rs @@ -9,15 +9,25 @@ use oci_spec::{LinuxNetwork, LinuxResources}; pub struct NetworkPriority {} impl Controller for NetworkPriority { + type Resource = LinuxNetwork; + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply NetworkPriority cgroup config"); - if let Some(network) = linux_resources.network.as_ref() { + if let Some(network) = Self::needs_to_handle(linux_resources) { Self::apply(cgroup_root, network)?; } Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if let Some(network) = &linux_resources.network { + return Some(network); + } + + None + } } impl NetworkPriority { diff --git a/src/cgroups/v1/pids.rs b/src/cgroups/v1/pids.rs index 09f905cce..f09db1b45 100644 --- a/src/cgroups/v1/pids.rs +++ b/src/cgroups/v1/pids.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::path::{Path}; use anyhow::Result; @@ -8,10 +8,12 @@ use oci_spec::{LinuxPids, LinuxResources}; pub struct Pids {} impl Controller for Pids { + type Resource = LinuxPids; + fn apply( linux_resources: &LinuxResources, - cgroup_root: &std::path::Path, - ) -> anyhow::Result<()> { + cgroup_root: &Path, + ) -> Result<()> { log::debug!("Apply pids cgroup config"); if let Some(pids) = &linux_resources.pids { @@ -20,6 +22,14 @@ impl Controller for Pids { Ok(()) } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Self::Resource> { + if let Some(pids) = &linux_resources.pids { + return Some(pids); + } + + None + } } impl Pids { diff --git a/src/cgroups/v1/util.rs b/src/cgroups/v1/util.rs index 7a31e28bf..dd070d6b2 100644 --- a/src/cgroups/v1/util.rs +++ b/src/cgroups/v1/util.rs @@ -3,21 +3,22 @@ use std::{collections::HashMap, path::PathBuf}; use anyhow::{anyhow, Result}; use procfs::process::Process; -use super::controller_type::CONTROLLERS; +use super::{ControllerType, controller_type::CONTROLLERS}; -pub fn list_subsystem_mount_points() -> Result> { +pub fn list_subsystem_mount_points() -> Result> { let mut mount_paths = HashMap::with_capacity(CONTROLLERS.len()); for controller in CONTROLLERS { - if let Ok(mount_point) = get_subsystem_mount_points(&controller.to_string()) { - mount_paths.insert(controller.to_string(), mount_point); + if let Ok(mount_point) = get_subsystem_mount_point(controller) { + mount_paths.insert(controller.to_owned(), mount_point); } } Ok(mount_paths) } -pub fn get_subsystem_mount_points(subsystem: &str) -> Result { +pub fn get_subsystem_mount_point(subsystem: &ControllerType) -> Result { + let subsystem = subsystem.to_string(); Process::myself()? .mountinfo()? .into_iter() From 6b1bfa40c9c015a2f8f62f27d4331f5c8914e788 Mon Sep 17 00:00:00 2001 From: Rongxiang Song Date: Tue, 29 Jun 2021 15:42:23 +0800 Subject: [PATCH 32/70] force delete container if it is running or created --- src/delete.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/delete.rs b/src/delete.rs index 7b535bace..30cf2f54a 100644 --- a/src/delete.rs +++ b/src/delete.rs @@ -6,13 +6,17 @@ use anyhow::{bail, Result}; use clap::Clap; use crate::cgroups; -use crate::container::Container; use crate::utils; +use crate::{ + container::{Container, ContainerStatus}, + signal, +}; +use nix::sys::signal as nix_signal; #[derive(Clap, Debug)] pub struct Delete { container_id: String, - // forces deletion of the container. + /// forces deletion of the container if it is still running (using SIGKILL) #[clap(short, long)] force: bool, } @@ -29,7 +33,15 @@ impl Delete { // load container state from json file, and check status of the container // it might be possible that delete is invoked on a running container. log::debug!("load the container from {:?}", container_root); - let container = Container::load(container_root)?.refresh_status()?; + let mut container = Container::load(container_root)?.refresh_status()?; + if container.can_kill() && self.force { + let sig = signal::from_str("SIGKILL")?; + log::debug!("kill signal {} to {}", sig, container.pid().unwrap()); + nix_signal::kill(container.pid().unwrap(), sig)?; + container = container.update_status(ContainerStatus::Stopped); + container.save()?; + } + log::debug!("container status: {:?}", container.status()); if container.can_delete() { if container.root.exists() { nix::unistd::chdir(&PathBuf::from(&container.state.bundle))?; From d71833f862aa2778c6da4139207981f7bf6da7d9 Mon Sep 17 00:00:00 2001 From: Rongxiang Song Date: Wed, 30 Jun 2021 14:20:41 +0800 Subject: [PATCH 33/70] add comments in intergration_test.sh about test case that runc no paas --- integration_test.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/integration_test.sh b/integration_test.sh index ef16abe82..dac099cdf 100755 --- a/integration_test.sh +++ b/integration_test.sh @@ -9,6 +9,8 @@ test_cases=("default/default.t" "linux_cgroups_devices/linux_cgroups_devices.t" "linux_cgroups_relative_devices/linux_cgroups_relative_devices.t" "linux_cgroups_relative_hugetlb/linux_cgroups_relative_hugetlb.t" "linux_cgroups_relative_memory/linux_cgroups_relative_memory.t" "linux_cgroups_relative_network/linux_cgroups_relative_network.t" "linux_cgroups_relative_pids/linux_cgroups_relative_pids.t" "create/create.t" "kill/kill.t" "delete/delete.t" "state/state.t") +# Record the tests that runc also fails to pass below, maybe we will fix this by origin integration test, issue: https://github.com/containers/youki/issues/56 +# no_paas_test_case=("start/start.t") for case in "${test_cases[@]}"; do echo "Running $case" if [ 0 -ne $(sudo RUST_BACKTRACE=1 YOUKI_LOG_LEVEL=debug RUNTIME=$root/youki $root/integration_test/src/github.com/opencontainers/runtime-tools/validation/$case | grep "not ok" | wc -l) ]; then From 61758bd98459f8d1c32d10196645cde0a0f76371 Mon Sep 17 00:00:00 2001 From: utam0k Date: Wed, 30 Jun 2021 22:58:48 +0900 Subject: [PATCH 34/70] remove unnecessary clone() in create.rs --- src/create.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/create.rs b/src/create.rs index bf732880d..6be323146 100644 --- a/src/create.rs +++ b/src/create.rs @@ -120,6 +120,7 @@ impl Create { Ok(()) } } + /// Fork the process and actually start the container process fn run_container>( pid_file: Option

, @@ -186,15 +187,15 @@ fn run_container>( Process::Child(_child) => unreachable!(), // This is actually the child process after fork Process::Init(mut init) => { - // setup args and env vars as in the spec - let spec_args: &Vec = &spec.process.args.clone(); - let envs: &Vec = &spec.process.env.clone(); // prepare process - init_process(spec, command, rootfs, namespaces)?; + setup_init_process(&spec, command, rootfs, &namespaces)?; init.ready()?; notify_socket.wait_for_container_start()?; // actually run the command / program to be run in container - utils::do_exec(&spec_args[0], spec_args, envs)?; + let args: &Vec = &spec.process.args; + let envs: &Vec = &spec.process.env; + utils::do_exec(&args[0], args, envs)?; + // the command / program is done executing container .refresh_state()? @@ -211,16 +212,16 @@ fn run_container>( } /// setup hostname, rootfs for the container process -fn init_process( - spec: oci_spec::Spec, +fn setup_init_process( + spec: &oci_spec::Spec, command: impl Command, rootfs: PathBuf, - namespaces: Namespaces, + namespaces: &Namespaces, ) -> Result<()> { - let proc = spec.process.clone(); + let proc = &spec.process; - command.set_hostname(&spec.hostname.as_str())?; - if spec.process.no_new_privileges { + command.set_hostname(spec.hostname.as_str())?; + if proc.no_new_privileges { let _ = prctl::set_no_new_privileges(true); } From 3d509e00ff3009694b9627112d1fb5ffa164fe2b Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Thu, 1 Jul 2021 22:21:29 +0200 Subject: [PATCH 35/70] Allow wider range of arguments for spec loading --- oci_spec/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index 2836fab83..7de141707 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -1,7 +1,7 @@ use nix::sys::stat::SFlag; use std::collections::HashMap; use std::fs::File; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use anyhow::{bail, Result}; use serde::{Deserialize, Serialize}; @@ -607,7 +607,7 @@ pub struct Spec { } impl Spec { - pub fn load(path: &str) -> Result { + pub fn load>(path: P) -> Result { let file = File::open(path)?; let mut spec: Spec = serde_json::from_reader(&file)?; // FIME: It is fail if the caller isn't in the correct directory. From 8a13f82cd5ce35d6261ddfd57b05ef951be33b44 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Thu, 1 Jul 2021 22:24:06 +0200 Subject: [PATCH 36/70] Rename command --- src/capabilities.rs | 6 +++--- src/command/linux.rs | 4 ++-- src/command/mod.rs | 4 ++-- src/command/{command.rs => syscall.rs} | 4 ++-- src/command/test.rs | 4 ++-- src/container/container.rs | 4 ++-- src/namespaces.rs | 6 +++--- 7 files changed, 16 insertions(+), 16 deletions(-) rename src/command/{command.rs => syscall.rs} (94%) diff --git a/src/capabilities.rs b/src/capabilities.rs index cc35aacfe..273b937cb 100644 --- a/src/capabilities.rs +++ b/src/capabilities.rs @@ -1,4 +1,4 @@ -use crate::command::Command; +use crate::command::Syscall; use caps::*; use anyhow::Result; @@ -12,13 +12,13 @@ fn to_set(caps: &[LinuxCapabilityType]) -> CapsHashSet { capabilities } -pub fn reset_effective(command: &impl Command) -> Result<()> { +pub fn reset_effective(command: &impl Syscall) -> Result<()> { log::debug!("reset all caps"); command.set_capability(CapSet::Effective, &caps::all())?; Ok(()) } -pub fn drop_privileges(cs: &LinuxCapabilities, command: &impl Command) -> Result<()> { +pub fn drop_privileges(cs: &LinuxCapabilities, command: &impl Syscall) -> Result<()> { let all = caps::all(); log::debug!("dropping bounding capabilities to {:?}", cs.bounding); for c in all.difference(&to_set(&cs.bounding)) { diff --git a/src/command/linux.rs b/src/command/linux.rs index f835be246..f34016b50 100644 --- a/src/command/linux.rs +++ b/src/command/linux.rs @@ -24,7 +24,7 @@ use nix::{sched::unshare, sys::stat::Mode}; use oci_spec::LinuxRlimit; -use super::Command; +use super::Syscall; use crate::capabilities; /// Empty structure to implement Command trait for @@ -46,7 +46,7 @@ impl LinuxCommand { } } -impl Command for LinuxCommand { +impl Syscall for LinuxCommand { /// To enable dynamic typing, /// see https://doc.rust-lang.org/std/any/index.html for more information fn as_any(&self) -> &dyn Any { diff --git a/src/command/mod.rs b/src/command/mod.rs index 08e0ee64d..a8bcd927f 100644 --- a/src/command/mod.rs +++ b/src/command/mod.rs @@ -3,8 +3,8 @@ //! to call syscalls required for container management #[allow(clippy::module_inception)] -pub mod command; +pub mod syscall; pub mod linux; pub mod test; -pub use command::Command; +pub use syscall::Syscall; diff --git a/src/command/command.rs b/src/command/syscall.rs similarity index 94% rename from src/command/command.rs rename to src/command/syscall.rs index 37fd66655..2ba95eae0 100644 --- a/src/command/command.rs +++ b/src/command/syscall.rs @@ -16,7 +16,7 @@ use crate::command::{linux::LinuxCommand, test::TestHelperCommand}; /// This specifies various kernel/other functionalities required for /// container management -pub trait Command { +pub trait Syscall { fn as_any(&self) -> &dyn Any; fn pivot_rootfs(&self, path: &Path) -> Result<()>; fn set_ns(&self, rawfd: i32, nstype: CloneFlags) -> Result<()>; @@ -28,7 +28,7 @@ pub trait Command { fn get_pwuid(&self, uid: u32) -> Option>; } -pub fn create_command() -> Box { +pub fn create_syscall() -> Box { if cfg!(test) { Box::new(TestHelperCommand::default()) } else { diff --git a/src/command/test.rs b/src/command/test.rs index a80e71649..01f1ac7dd 100644 --- a/src/command/test.rs +++ b/src/command/test.rs @@ -4,7 +4,7 @@ use caps::{errors::CapsError, CapSet, CapsHashSet}; use nix::sched::CloneFlags; use oci_spec::LinuxRlimit; -use super::Command; +use super::Syscall; #[derive(Clone)] pub struct TestHelperCommand { @@ -23,7 +23,7 @@ impl Default for TestHelperCommand { } } -impl Command for TestHelperCommand { +impl Syscall for TestHelperCommand { fn as_any(&self) -> &dyn Any { self } diff --git a/src/container/container.rs b/src/container/container.rs index b05017f1c..d144e9497 100644 --- a/src/container/container.rs +++ b/src/container/container.rs @@ -9,7 +9,7 @@ use nix::unistd::Pid; use chrono::Utc; use procfs::process::Process; -use crate::command::command::create_command; +use crate::command::syscall::create_syscall; use crate::container::{ContainerStatus, State}; @@ -119,7 +119,7 @@ impl Container { pub fn creator(&self) -> Option { if let Some(uid) = self.state.creator { - let command = create_command(); + let command = create_syscall(); let user_name = command.get_pwuid(uid); if let Some(user_name) = user_name { return Some((&*user_name).to_owned()); diff --git a/src/namespaces.rs b/src/namespaces.rs index 6389dec9f..b6e81325e 100644 --- a/src/namespaces.rs +++ b/src/namespaces.rs @@ -15,12 +15,12 @@ use nix::{ unistd::{self, Gid, Uid}, }; -use crate::command::{command::create_command, Command}; +use crate::command::{syscall::create_syscall, Syscall}; use oci_spec::LinuxNamespace; pub struct Namespaces { spaces: Vec, - command: Box, + command: Box, pub clone_flags: CloneFlags, } @@ -33,7 +33,7 @@ impl From> for Namespaces { cf }, ); - let command: Box = create_command(); + let command: Box = create_syscall(); Namespaces { spaces: namespaces, From 9e7b13c7c253f3c3c13574a799f8e5b353a09d99 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Thu, 1 Jul 2021 22:25:23 +0200 Subject: [PATCH 37/70] Provide context in case of errors during dir creation --- src/utils.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/utils.rs b/src/utils.rs index 1711de195..d5f0729fb 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -96,6 +96,11 @@ pub fn write_file, C: AsRef<[u8]>>(path: P, contents: C) -> Resul Ok(()) } +pub fn create_dir_all>(path: P) -> Result<()> { + let path = path.as_ref(); + fs::create_dir_all(path).with_context(|| format!("failed to create directory {:?}", path)) +} + pub struct TempDir { path: Option, } From a46faff748bf1f69dca7276b2a0fc7cdaba1551f Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 3 Jul 2021 00:39:23 +0200 Subject: [PATCH 38/70] Ensure file info is captured --- oci_spec/src/lib.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index 7de141707..b3a3324af 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use std::fs::File; use std::path::{Path, PathBuf}; -use anyhow::{bail, Result}; +use anyhow::{Context, Result, bail}; use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Clone)] @@ -608,10 +608,12 @@ pub struct Spec { impl Spec { pub fn load>(path: P) -> Result { - let file = File::open(path)?; + let path = path.as_ref(); + let file = File::open(path).with_context(|| format!("failed to open {:?}", path))?; let mut spec: Spec = serde_json::from_reader(&file)?; // FIME: It is fail if the caller isn't in the correct directory. - spec.root.path = std::fs::canonicalize(spec.root.path)?; + spec.root.path = std::fs::canonicalize(&spec.root.path) + .with_context(|| format!("failed to canonicalize {:?}", spec.root.path))?; Ok(spec) } } From a387a43ffc3b80a9de3f54ed2ff28044d555cb94 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sat, 3 Jul 2021 00:45:38 +0200 Subject: [PATCH 39/70] Modularize create code --- src/capabilities.rs | 16 +- src/container/builder.rs | 281 ++++++++++++++++++++++++++++++++++ src/container/builder_impl.rs | 107 +++++++++++++ src/container/mod.rs | 2 + src/create.rs | 200 ++---------------------- src/process/fork.rs | 19 ++- src/process/init.rs | 38 ++++- src/process/mod.rs | 2 +- src/tty.rs | 4 +- 9 files changed, 464 insertions(+), 205 deletions(-) create mode 100644 src/container/builder.rs create mode 100644 src/container/builder_impl.rs diff --git a/src/capabilities.rs b/src/capabilities.rs index 273b937cb..773f6d578 100644 --- a/src/capabilities.rs +++ b/src/capabilities.rs @@ -1,4 +1,4 @@ -use crate::command::Syscall; +use crate::command::{Syscall}; use caps::*; use anyhow::Result; @@ -12,13 +12,13 @@ fn to_set(caps: &[LinuxCapabilityType]) -> CapsHashSet { capabilities } -pub fn reset_effective(command: &impl Syscall) -> Result<()> { +pub fn reset_effective(syscall: &impl Syscall) -> Result<()> { log::debug!("reset all caps"); - command.set_capability(CapSet::Effective, &caps::all())?; + syscall.set_capability(CapSet::Effective, &caps::all())?; Ok(()) } -pub fn drop_privileges(cs: &LinuxCapabilities, command: &impl Syscall) -> Result<()> { +pub fn drop_privileges(cs: &LinuxCapabilities, syscall: &impl Syscall) -> Result<()> { let all = caps::all(); log::debug!("dropping bounding capabilities to {:?}", cs.bounding); for c in all.difference(&to_set(&cs.bounding)) { @@ -31,11 +31,11 @@ pub fn drop_privileges(cs: &LinuxCapabilities, command: &impl Syscall) -> Result } } - command.set_capability(CapSet::Effective, &to_set(&cs.effective))?; - command.set_capability(CapSet::Permitted, &to_set(&cs.permitted))?; - command.set_capability(CapSet::Inheritable, &to_set(&cs.inheritable))?; + syscall.set_capability(CapSet::Effective, &to_set(&cs.effective))?; + syscall.set_capability(CapSet::Permitted, &to_set(&cs.permitted))?; + syscall.set_capability(CapSet::Inheritable, &to_set(&cs.inheritable))?; - if let Err(e) = command.set_capability(CapSet::Ambient, &to_set(&cs.ambient)) { + if let Err(e) = syscall.set_capability(CapSet::Ambient, &to_set(&cs.ambient)) { log::error!("failed to set ambient capabilities: {}", e); } Ok(()) diff --git a/src/container/builder.rs b/src/container/builder.rs new file mode 100644 index 000000000..5e5b66319 --- /dev/null +++ b/src/container/builder.rs @@ -0,0 +1,281 @@ +#![allow(unused_imports, unused_variables)] + +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, +}; + +use anyhow::{bail, Result}; +use nix::unistd; +use oci_spec::Spec; + +use crate::{command::{Syscall, linux::{self, LinuxCommand}, syscall::create_syscall}, notify_socket::NotifyListener, rootless::{self, lookup_map_binaries, should_use_rootless, Rootless}, tty, utils}; + +use super::{builder_impl::ContainerBuilderImpl, Container, ContainerStatus}; + +pub struct ContainerBuilder { + // defaults + /// + init: bool, + /// + use_systemd: bool, + /// + syscall: LinuxCommand, + //// + root_path: PathBuf, + + // required + /// + container_id: String, + /// + bundle: Option, + + // optional + /// + pid_file: Option, + /// + console_socket: Option, +} + +impl ContainerBuilder { + pub fn new_init>(container_id: String, bundle: P) -> Result { + let bundle = Some(fs::canonicalize(bundle.into())?); + let root_path = PathBuf::from("/run/youki"); + + Ok(Self { + init: true, + use_systemd: true, + syscall: LinuxCommand, + root_path, + container_id, + bundle, + pid_file: None, + console_socket: None, + }) + } + + pub fn new_tenant(container_id: String) -> Self { + let root_path = PathBuf::from("/run/youki"); + + Self { + init: false, + use_systemd: true, + syscall: LinuxCommand, + root_path, + container_id, + bundle: None, + pid_file: None, + console_socket: None, + } + } + + pub fn with_systemd(mut self, should_use: bool) -> Self { + self.use_systemd = should_use; + self + } + + pub fn with_root_path>(mut self, path: P) -> Self { + self.root_path = path.into(); + self + } + + pub fn with_pid_file>(mut self, path: P) -> Self { + self.pid_file = Some(path.into()); + self + } + + pub fn with_console_socket>(mut self, path: P) -> Self { + self.console_socket = Some(path.into()); + self + } + + pub fn with_env(mut self, env: HashMap) -> Self { + todo!(); + } + + pub fn with_cwd>(mut self, path: P) -> Self { + todo!(); + } + + pub fn with_container_command(mut self, command: Vec) -> Self { + todo!(); + } + + pub fn build(mut self) -> Result<()> { + let container_dir = self.prepare_container_dir()?; + let spec = self.load_and_safeguard_spec(&container_dir)?; + unistd::chdir(&*container_dir)?; + + let container = if self.init { + Some(self.create_container_state(&container_dir)?) + } else { + None + }; + + let notify_socket: NotifyListener = NotifyListener::new(&container_dir)?; + // convert path of root file system of the container to absolute path + let rootfs = fs::canonicalize(&spec.root.path)?; + + // if socket file path is given in commandline options, + // get file descriptors of console socket + let csocketfd = if let Some(console_socket) = &self.console_socket { + Some(tty::setup_console_socket(&container_dir, console_socket)?) + } else { + None + }; + + let rootless = self.is_rootless_required(&spec)?; + + let mut builder_impl = ContainerBuilderImpl { + init: self.init, + use_systemd: self.use_systemd, + root_path: self.root_path, + container_id: self.container_id, + pid_file: self.pid_file, + syscall: self.syscall, + console_socket: csocketfd, + rootless, + container_dir, + spec, + rootfs, + notify_socket, + container, + }; + + builder_impl.create()?; + Ok(()) + } + + fn prepare_container_dir(&mut self) -> Result { + let container_dir = self.root_path.join(&self.container_id); + log::debug!("container directory will be {:?}", container_dir); + + match (self.init, container_dir.exists()) { + (true, true) => bail!("container {} already exists", self.container_id), + (true, false) => utils::create_dir_all(&container_dir)?, + (false, true) => {} + (false, false) => bail!("container {} does not exist", self.container_id), + } + + Ok(container_dir) + } + + fn load_and_safeguard_spec(&self, container_dir: &Path) -> Result { + let spec_path = if self.init { + let config_path = self.bundle.as_ref().unwrap().join("config.json"); + fs::copy(&config_path, container_dir.join("config.json"))?; + config_path + } else { + container_dir.join("config.json") + }; + + let spec = oci_spec::Spec::load(spec_path)?; + Ok(spec) + } + + fn is_rootless_required(&self, spec: &Spec) -> Result> { + let linux = spec.linux.as_ref().unwrap(); + + let rootless = if should_use_rootless() { + log::debug!("rootless container should be created"); + log::warn!( + "resource constraints and multi id mapping is unimplemented for rootless containers" + ); + rootless::validate(spec)?; + let mut rootless = Rootless::from(linux); + if let Some((uid_binary, gid_binary)) = lookup_map_binaries(linux)? { + rootless.newuidmap = Some(uid_binary); + rootless.newgidmap = Some(gid_binary); + } + Some(rootless) + } else { + None + }; + + Ok(rootless) + } + + fn create_container_state(&self, container_dir: &Path) -> Result { + let container = Container::new( + &self.container_id, + ContainerStatus::Creating, + None, + self.bundle.as_ref().unwrap().to_str().unwrap(), + &container_dir, + )?; + container.save()?; + Ok(container) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // required values (must be specified in new...) + // - create + // - id + // - bundle + // - exec + // - id + // + // use with_... methods to specify + // optional values + // - console-socket + // - pid-file + // + // overwritable values + // - systemd (default true) + // - root_path (default /run/youki) + // + // overwritable values (for exec only?) + // - env + // - cwd + // - container command + // + // calculated in build() + // computed values + // - rootless + // - container_dir + // - spec + // - notify_socket + // - container + + // create + fn test_create_init() -> Result<()> { + let id = "".to_owned(); + let bundle = PathBuf::from(""); + let pid_file = PathBuf::from(""); + let console_socket = PathBuf::from(""); + let root_path = PathBuf::from(""); + + let container = ContainerBuilder::new_init(id, bundle)? + .with_pid_file(pid_file) // optional + .with_console_socket(console_socket) //optional + .with_systemd(false) // overwrite default + .with_root_path(root_path) // overwrite default + .build()?; + + Ok(()) + } + + // exec + fn test_create_tenant() -> Result<()> { + let id = "".to_owned(); + let pid_file = PathBuf::from(""); + let console_socket = PathBuf::from(""); + let cwd = PathBuf::from(""); + let env = HashMap::new(); + + let container = ContainerBuilder::new_tenant(id) + .with_pid_file(pid_file) + .with_console_socket(console_socket) + .with_cwd(cwd) + .with_env(env) + .with_container_command(vec!["sleep".to_owned(), "9001".to_owned()]) + .build()?; + + Ok(()) + } +} diff --git a/src/container/builder_impl.rs b/src/container/builder_impl.rs new file mode 100644 index 000000000..0c9206d3d --- /dev/null +++ b/src/container/builder_impl.rs @@ -0,0 +1,107 @@ +use std::{path::PathBuf}; + +use anyhow::{Result}; +use nix::{ + sched, + unistd::{Gid, Uid}, +}; +use oci_spec::Spec; + +use crate::{cgroups, command::{Syscall, linux::LinuxCommand}, namespaces::Namespaces, notify_socket::NotifyListener, process::{Process, fork, setup_init_process}, rootless::{Rootless}, stdio::FileDescriptor, tty, utils}; + +use super::{Container, ContainerStatus}; + +pub(super) struct ContainerBuilderImpl{ + pub init: bool, + pub syscall: LinuxCommand, + pub use_systemd: bool, + pub container_id: String, + pub root_path: PathBuf, + pub container_dir: PathBuf, + pub spec: Spec, + pub rootfs: PathBuf, + pub pid_file: Option, + pub console_socket: Option, + pub rootless: Option, + pub notify_socket: NotifyListener, + pub container: Option, +} + +impl ContainerBuilderImpl { + pub fn create(&mut self) -> Result<()> { + if let Process::Parent(_) = self.run_container()? { + std::process::exit(0); + } + + Ok(()) + } + + fn run_container(&mut self) -> Result { + prctl::set_dumpable(false).unwrap(); + + let linux = self.spec.linux.as_ref().unwrap(); + let namespaces: Namespaces = linux.namespaces.clone().into(); + + let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, &self.container_id); + let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path, self.use_systemd)?; + + // first fork, which creates process, which will later create actual container process + match fork::fork_first( + &self.pid_file, + &self.rootless, + linux, + self.container.as_ref(), + cmanager, + )? { + // In the parent process, which called run_container + Process::Parent(parent) => Ok(Process::Parent(parent)), + // in child process + Process::Child(child) => { + // set limits and namespaces to the process + for rlimit in self.spec.process.rlimits.iter() { + self.syscall.set_rlimit(rlimit)? + } + self.syscall.set_id(Uid::from_raw(0), Gid::from_raw(0))?; + + let without = sched::CloneFlags::CLONE_NEWUSER; + namespaces.apply_unshare(without)?; + + // set up tty if specified + if let Some(csocketfd) = &self.console_socket { + tty::setup_console(csocketfd)?; + } + + // set namespaces + namespaces.apply_setns()?; + + // fork second time, which will later create container + match fork::fork_init(child)? { + Process::Child(_child) => unreachable!(), + // This is actually the child process after fork + Process::Init(mut init) => { + // prepare process + setup_init_process(&self.spec, &self.syscall, self.rootfs.clone(), &namespaces)?; + init.ready()?; + self.notify_socket.wait_for_container_start()?; + // actually run the command / program to be run in container + let args: &Vec = &self.spec.process.args; + let envs: &Vec = &self.spec.process.env; + utils::do_exec(&args[0], args, envs)?; + + if let Some(container) = &self.container { + // the command / program is done executing + container + .refresh_state()? + .update_status(ContainerStatus::Stopped) + .save()?; + } + + Ok(Process::Init(init)) + } + Process::Parent(_) => unreachable!(), + } + } + _ => unreachable!(), + } + } +} diff --git a/src/container/mod.rs b/src/container/mod.rs index bf3ca1982..9df944bfe 100644 --- a/src/container/mod.rs +++ b/src/container/mod.rs @@ -3,5 +3,7 @@ #[allow(clippy::module_inception)] mod container; mod state; +mod builder_impl; +pub mod builder; pub use container::Container; pub use state::{ContainerStatus, State}; diff --git a/src/create.rs b/src/create.rs index 6be323146..43d45a29a 100644 --- a/src/create.rs +++ b/src/create.rs @@ -1,3 +1,5 @@ +#![allow(unused_imports)] + //! Handles the creation of a new container use std::fs; use std::path::{Path, PathBuf}; @@ -9,6 +11,7 @@ use nix::sched; use nix::unistd; use nix::unistd::{Gid, Uid}; +use crate::container::builder::ContainerBuilder; use crate::container::{Container, ContainerStatus}; use crate::namespaces::Namespaces; use crate::notify_socket::NotifyListener; @@ -18,7 +21,7 @@ use crate::rootless::{lookup_map_binaries, should_use_rootless, Rootless}; use crate::stdio::FileDescriptor; use crate::tty; use crate::utils; -use crate::{capabilities, command::Command}; +use crate::{capabilities, command::Syscall}; use crate::{cgroups, rootless}; /// This is the main structure which stores various commandline options given by @@ -50,196 +53,25 @@ impl Create { &self, root_path: PathBuf, systemd_cgroup: bool, - command: impl Command, + command: impl Syscall, ) -> Result<()> { - // create a directory for the container to store state etc. - // if already present, return error - let bundle_canonicalized = fs::canonicalize(&self.bundle) - .unwrap_or_else(|_| panic!("failed to canonicalied {:?}", &self.bundle)); - let container_dir = root_path.join(&self.container_id); - log::debug!("container directory will be {:?}", container_dir); - if !container_dir.exists() { - fs::create_dir(&container_dir).unwrap(); - } else { - bail!("{} already exists", self.container_id) + let mut builder = ContainerBuilder::new_init(self.container_id.to_owned(), self.bundle.clone())?; + if let Some(pid_file) = &self.pid_file { + builder = builder.with_pid_file(pid_file); } - // change directory to the bundle directory, and load configuration, - // copy that to the container's directory - unistd::chdir(&self.bundle)?; - - let spec = oci_spec::Spec::load("config.json")?; - fs::copy("config.json", container_dir.join("config.json"))?; - log::debug!("spec: {:?}", spec); - - // convert path to absolute path, as relative path will be evaluated - // relative to where youki command is executed, and will be difficult to manipulate - let container_dir = fs::canonicalize(container_dir)?; - unistd::chdir(&*container_dir)?; - - log::debug!("{:?}", &container_dir); - - let container = Container::new( - &self.container_id, - ContainerStatus::Creating, - None, - bundle_canonicalized.to_str().unwrap(), - &container_dir, - )?; - container.save()?; - - let mut notify_socket: NotifyListener = NotifyListener::new(&container_dir)?; - // convert path of root file system of the container to absolute path - let rootfs = fs::canonicalize(&spec.root.path)?; - - // if socket file path is given in commandline options, - // get file descriptors of console socket - let csocketfd = if let Some(console_socket) = &self.console_socket { - Some(tty::setup_console_socket(&container_dir, console_socket)?) - } else { - None - }; - - let process = run_container( - self.pid_file.as_ref(), - &mut notify_socket, - rootfs, - spec, - csocketfd, - systemd_cgroup, - container, - command, - )?; - // the run_container forks the process, so not after return if in - // parent process, exit ; as the work of creating the container is done - if let Process::Parent(_) = process { - process::exit(0); - } - // if in the child process after fork, then just return - Ok(()) - } -} - -/// Fork the process and actually start the container process -fn run_container>( - pid_file: Option

, - notify_socket: &mut NotifyListener, - rootfs: PathBuf, - spec: oci_spec::Spec, - csocketfd: Option, - systemd_cgroup: bool, - container: Container, - command: impl Command, -) -> Result { - // disable core dump for the process, check https://man7.org/linux/man-pages/man2/prctl.2.html for more information - prctl::set_dumpable(false).unwrap(); - - // get Linux specific section of OCI spec, - // refer https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md for more information - let linux = spec.linux.as_ref().unwrap(); - let namespaces: Namespaces = linux.namespaces.clone().into(); - - let rootless = if should_use_rootless() { - log::debug!("rootless container should be created"); - log::warn!( - "resource constraints and multi id mapping is unimplemented for rootless containers" - ); - rootless::validate(&spec)?; - let mut rootless = Rootless::from(linux); - if let Some((uid_binary, gid_binary)) = lookup_map_binaries(linux)? { - rootless.newuidmap = Some(uid_binary); - rootless.newgidmap = Some(gid_binary); + if let Some(console_socket) = &self.console_socket { + builder = builder.with_console_socket(console_socket); } - Some(rootless) - } else { - None - }; + + builder + .with_root_path(root_path) + .with_systemd(systemd_cgroup) + .build()?; - let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, container.id()); - let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path, systemd_cgroup)?; - - // first fork, which creates process, which will later create actual container process - match fork::fork_first(pid_file, rootless, linux, &container, cmanager)? { - // In the parent process, which called run_container - Process::Parent(parent) => Ok(Process::Parent(parent)), - // in child process - Process::Child(child) => { - // set limits and namespaces to the process - for rlimit in spec.process.rlimits.iter() { - command.set_rlimit(rlimit)? - } - command.set_id(Uid::from_raw(0), Gid::from_raw(0))?; - - let without = sched::CloneFlags::CLONE_NEWUSER; - namespaces.apply_unshare(without)?; - - // set up tty if specified - if let Some(csocketfd) = csocketfd { - tty::setup_console(csocketfd)?; - } - - // set namespaces - namespaces.apply_setns()?; - - // fork second time, which will later create container - match fork::fork_init(child)? { - Process::Child(_child) => unreachable!(), - // This is actually the child process after fork - Process::Init(mut init) => { - // prepare process - setup_init_process(&spec, command, rootfs, &namespaces)?; - init.ready()?; - notify_socket.wait_for_container_start()?; - // actually run the command / program to be run in container - let args: &Vec = &spec.process.args; - let envs: &Vec = &spec.process.env; - utils::do_exec(&args[0], args, envs)?; - - // the command / program is done executing - container - .refresh_state()? - .update_status(ContainerStatus::Stopped) - .save()?; - - Ok(Process::Init(init)) - } - Process::Parent(_) => unreachable!(), - } - } - _ => unreachable!(), + Ok(()) } } -/// setup hostname, rootfs for the container process -fn setup_init_process( - spec: &oci_spec::Spec, - command: impl Command, - rootfs: PathBuf, - namespaces: &Namespaces, -) -> Result<()> { - let proc = &spec.process; - - command.set_hostname(spec.hostname.as_str())?; - if proc.no_new_privileges { - let _ = prctl::set_no_new_privileges(true); - } - - rootfs::prepare_rootfs( - &spec, - &rootfs, - namespaces - .clone_flags - .contains(sched::CloneFlags::CLONE_NEWUSER), - )?; - // change the root of filesystem of the process to the rootfs - command.pivot_rootfs(&rootfs)?; - - command.set_id(Uid::from_raw(proc.user.uid), Gid::from_raw(proc.user.gid))?; - capabilities::reset_effective(&command)?; - if let Some(caps) = &proc.capabilities { - capabilities::drop_privileges(&caps, &command)?; - } - Ok(()) -} diff --git a/src/process/fork.rs b/src/process/fork.rs index 4fe23ff76..ca2a4a67c 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -21,10 +21,10 @@ use crate::rootless::Rootless; /// Function to perform the first fork for in order to run the container process pub fn fork_first>( - pid_file: Option

, - rootless: Option, + pid_file: &Option

, + rootless: &Option, linux: &oci_spec::Linux, - container: &Container, + container: Option<&Container>, cmanager: Box, ) -> Result { // create new parent process structure @@ -73,12 +73,15 @@ pub fn fork_first>( cmanager.apply(&linux.resources.as_ref().unwrap())?; } - // update status and pid of the container process + if let Some(container) = container { + // update status and pid of the container process container - .update_status(ContainerStatus::Created) - .set_creator(nix::unistd::geteuid().as_raw()) - .set_pid(init_pid) - .save()?; + .update_status(ContainerStatus::Created) + .set_creator(nix::unistd::geteuid().as_raw()) + .set_pid(init_pid) + .save()?; + } + // if file to write the pid to is specified, write pid of the child if let Some(pid_file) = pid_file { fs::write(&pid_file, format!("{}", child))?; diff --git a/src/process/init.rs b/src/process/init.rs index 42a015927..64f61f529 100644 --- a/src/process/init.rs +++ b/src/process/init.rs @@ -1,9 +1,10 @@ -use std::io::Write; +use std::{io::Write, path::PathBuf}; use anyhow::Result; use mio::unix::pipe::Sender; +use nix::{sched, unistd::{Gid, Uid}}; -use crate::process::message::Message; +use crate::{capabilities, command::Syscall, namespaces::Namespaces, process::message::Message, rootfs}; /// Contains sending end for pipe for the child process pub struct InitProcess { @@ -36,3 +37,36 @@ impl InitProcess { Ok(()) } } + +/// setup hostname, rootfs for the container process +pub fn setup_init_process( + spec: &oci_spec::Spec, + command: &impl Syscall, + rootfs: PathBuf, + namespaces: &Namespaces, +) -> Result<()> { + let proc = &spec.process; + + command.set_hostname(spec.hostname.as_str())?; + if proc.no_new_privileges { + let _ = prctl::set_no_new_privileges(true); + } + + rootfs::prepare_rootfs( + &spec, + &rootfs, + namespaces + .clone_flags + .contains(sched::CloneFlags::CLONE_NEWUSER), + )?; + + // change the root of filesystem of the process to the rootfs + command.pivot_rootfs(&rootfs)?; + + command.set_id(Uid::from_raw(proc.user.uid), Gid::from_raw(proc.user.gid))?; + capabilities::reset_effective(command)?; + if let Some(caps) = &proc.capabilities { + capabilities::drop_privileges(&caps, command)?; + } + Ok(()) +} diff --git a/src/process/mod.rs b/src/process/mod.rs index 99ff334e8..16c59a55f 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -10,7 +10,7 @@ mod child; mod init; mod parent; -pub use init::InitProcess; +pub use init::{InitProcess, setup_init_process}; /// Used to describe type of process after fork. /// Parent and child processes mean the same thing as in a normal fork call diff --git a/src/tty.rs b/src/tty.rs index 10b5de92b..a375e6ca1 100644 --- a/src/tty.rs +++ b/src/tty.rs @@ -43,7 +43,7 @@ pub fn setup_console_socket( Ok(csocketfd.into()) } -pub fn setup_console(console_fd: FileDescriptor) -> Result<()> { +pub fn setup_console(console_fd: &FileDescriptor) -> Result<()> { // You can also access pty master, but it is better to use the API. // ref. https://github.com/containerd/containerd/blob/261c107ffc4ff681bc73988f64e3f60c32233b37/vendor/github.com/containerd/go-runc/console.go#L139-L154 let openpty_result = nix::pty::openpty(None, None)?; @@ -137,7 +137,7 @@ mod tests { let lis = UnixListener::bind(Path::join(&testdir, "console-socket")); assert!(lis.is_ok()); let fd = setup_console_socket(&&rundir_path, &socket_path); - let status = setup_console(fd.unwrap()); + let status = setup_console(&fd.unwrap()); assert!(status.is_ok()); } } From 6c1de20c6f6598668b2e44d63a176290903e8a7d Mon Sep 17 00:00:00 2001 From: Rongxiang Song Date: Fri, 2 Jul 2021 15:40:14 +0800 Subject: [PATCH 40/70] add cgroup v2 pid controller --- src/cgroups/v2/pids.rs | 63 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/src/cgroups/v2/pids.rs b/src/cgroups/v2/pids.rs index 5306715de..27a0bb262 100644 --- a/src/cgroups/v2/pids.rs +++ b/src/cgroups/v2/pids.rs @@ -1,12 +1,71 @@ +use std::path::Path; + use anyhow::Result; +use crate::cgroups::common; + use super::controller::Controller; -use oci_spec::LinuxResources; +use oci_spec::{LinuxPids, LinuxResources}; pub struct Pids {} impl Controller for Pids { - fn apply(_: &LinuxResources, _: &std::path::Path) -> Result<()> { + fn apply(linux_resource: &LinuxResources, cgroup_root: &std::path::Path) -> Result<()> { + log::debug!("Apply pids cgroup v2 config"); + if let Some(pids) = &linux_resource.pids { + Self::apply(cgroup_root, pids)?; + } Ok(()) } } + +impl Pids { + fn apply(root_path: &Path, pids: &LinuxPids) -> Result<()> { + let limit = if pids.limit > 0 { + pids.limit.to_string() + } else { + "max".to_string() + }; + Ok(common::write_cgroup_file( + &root_path.join("pids.max"), + &limit, + )?) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; + use oci_spec::LinuxPids; + + #[test] + fn test_set_pids() { + let pids_file_name = "pids.max"; + let tmp = create_temp_dir("v2_test_set_pids").expect("create temp directory for test"); + set_fixture(&tmp, pids_file_name, "1000").expect("Set fixture for 1000 pids"); + + let pids = LinuxPids { limit: 1000 }; + + Pids::apply(&tmp, &pids).expect("apply pids"); + let content = + std::fs::read_to_string(tmp.join(pids_file_name)).expect("Read pids contents"); + assert_eq!(pids.limit.to_string(), content); + } + + #[test] + fn test_set_pids_max() { + let pids_file_name = "pids.max"; + let tmp = create_temp_dir("v2_test_set_pids_max").expect("create temp directory for test"); + set_fixture(&tmp, pids_file_name, "0").expect("set fixture for 0 pids"); + + let pids = LinuxPids { limit: 0 }; + + Pids::apply(&tmp, &pids).expect("apply pids"); + + let content = + std::fs::read_to_string(tmp.join(pids_file_name)).expect("Read pids contents"); + assert_eq!("max".to_string(), content); + } +} From 123c2f688802fd1eff625b6d610ef3563693ed36 Mon Sep 17 00:00:00 2001 From: utam0k Date: Sun, 4 Jul 2021 18:26:39 +0900 Subject: [PATCH 41/70] make String to signal conversion more user friendly by using a Trait. --- src/delete.rs | 8 ++---- src/kill.rs | 4 +-- src/signal.rs | 78 +++++++++++++++++++++++++++------------------------ 3 files changed, 47 insertions(+), 43 deletions(-) diff --git a/src/delete.rs b/src/delete.rs index 30cf2f54a..2ddf0dc9e 100644 --- a/src/delete.rs +++ b/src/delete.rs @@ -4,13 +4,11 @@ use std::path::PathBuf; use anyhow::{bail, Result}; use clap::Clap; +use nix::sys::signal::Signal; use crate::cgroups; +use crate::container::{Container, ContainerStatus}; use crate::utils; -use crate::{ - container::{Container, ContainerStatus}, - signal, -}; use nix::sys::signal as nix_signal; #[derive(Clap, Debug)] @@ -35,7 +33,7 @@ impl Delete { log::debug!("load the container from {:?}", container_root); let mut container = Container::load(container_root)?.refresh_status()?; if container.can_kill() && self.force { - let sig = signal::from_str("SIGKILL")?; + let sig = Signal::SIGKILL; log::debug!("kill signal {} to {}", sig, container.pid().unwrap()); nix_signal::kill(container.pid().unwrap(), sig)?; container = container.update_status(ContainerStatus::Stopped); diff --git a/src/kill.rs b/src/kill.rs index 9c0eebb2d..734d65b17 100644 --- a/src/kill.rs +++ b/src/kill.rs @@ -6,7 +6,7 @@ use nix::sys::signal as nix_signal; use crate::{ container::{Container, ContainerStatus}, - signal, + signal::ToSignal, }; #[derive(Clap, Debug)] @@ -30,7 +30,7 @@ impl Kill { // it might be possible that kill is invoked on a already stopped container etc. let container = Container::load(container_root)?.refresh_status()?; if container.can_kill() { - let sig = signal::from_str(self.signal.as_str())?; + let sig = self.signal.to_signal()?; log::debug!("kill signal {} to {}", sig, container.pid().unwrap()); nix_signal::kill(container.pid().unwrap(), sig)?; container.update_status(ContainerStatus::Stopped).save()?; diff --git a/src/signal.rs b/src/signal.rs index 39ddbb26d..b988f3c75 100644 --- a/src/signal.rs +++ b/src/signal.rs @@ -3,40 +3,46 @@ use anyhow::{bail, Result}; use nix::sys::signal::Signal; -pub fn from_str(signal: &str) -> Result { - use Signal::*; - Ok(match signal.to_ascii_uppercase().as_str() { - "1" | "HUP" | "SIGHUP" => Signal::SIGHUP, - "2" | "INT" | "SIGINT" => Signal::SIGINT, - "3" | "QUIT" | "SIGQUIT" => Signal::SIGQUIT, - "4" | "ILL" | "SIGILL" => Signal::SIGILL, - "5" | "BUS" | "SIGBUS" => Signal::SIGBUS, - "6" | "ABRT" | "IOT" | "SIGABRT" | "SIGIOT" => Signal::SIGABRT, - "7" | "TRAP" | "SIGTRAP" => Signal::SIGTRAP, - "8" | "FPE" | "SIGFPE" => Signal::SIGFPE, - "9" | "KILL" | "SIGKILL" => Signal::SIGKILL, - "10" | "USR1" | "SIGUSR1" => Signal::SIGUSR1, - "11" | "SEGV" | "SIGSEGV" => SIGSEGV, - "12" | "USR2" | "SIGUSR2" => SIGUSR2, - "13" | "PIPE" | "SIGPIPE" => SIGPIPE, - "14" | "ALRM" | "SIGALRM" => SIGALRM, - "15" | "TERM" | "SIGTERM" => SIGTERM, - "16" | "STKFLT" | "SIGSTKFLT" => SIGSTKFLT, - "17" | "CHLD" | "SIGCHLD" => SIGCHLD, - "18" | "CONT" | "SIGCONT" => SIGCONT, - "19" | "STOP" | "SIGSTOP" => SIGSTOP, - "20" | "TSTP" | "SIGTSTP" => SIGTSTP, - "21" | "TTIN" | "SIGTTIN" => SIGTTIN, - "22" | "TTOU" | "SIGTTOU" => SIGTTOU, - "23" | "URG" | "SIGURG" => SIGURG, - "24" | "XCPU" | "SIGXCPU" => SIGXCPU, - "25" | "XFSZ" | "SIGXFSZ" => SIGXFSZ, - "26" | "VTALRM" | "SIGVTALRM" => SIGVTALRM, - "27" | "PROF" | "SIGPROF" => SIGPROF, - "28" | "WINCH" | "SIGWINCH" => SIGWINCH, - "29" | "IO" | "SIGIO" => SIGIO, - "30" | "PWR" | "SIGPWR" => SIGPWR, - "31" | "SYS" | "SIGSYS" => SIGSYS, - _ => bail! {"{} is not a valid signal", signal}, - }) +pub trait ToSignal { + fn to_signal(&self) -> Result; +} + +impl ToSignal for String { + fn to_signal(&self) -> Result { + use Signal::*; + Ok(match self.to_ascii_uppercase().as_str() { + "1" | "HUP" | "SIGHUP" => Signal::SIGHUP, + "2" | "INT" | "SIGINT" => Signal::SIGINT, + "3" | "QUIT" | "SIGQUIT" => Signal::SIGQUIT, + "4" | "ILL" | "SIGILL" => Signal::SIGILL, + "5" | "BUS" | "SIGBUS" => Signal::SIGBUS, + "6" | "ABRT" | "IOT" | "SIGABRT" | "SIGIOT" => Signal::SIGABRT, + "7" | "TRAP" | "SIGTRAP" => Signal::SIGTRAP, + "8" | "FPE" | "SIGFPE" => Signal::SIGFPE, + "9" | "KILL" | "SIGKILL" => Signal::SIGKILL, + "10" | "USR1" | "SIGUSR1" => Signal::SIGUSR1, + "11" | "SEGV" | "SIGSEGV" => SIGSEGV, + "12" | "USR2" | "SIGUSR2" => SIGUSR2, + "13" | "PIPE" | "SIGPIPE" => SIGPIPE, + "14" | "ALRM" | "SIGALRM" => SIGALRM, + "15" | "TERM" | "SIGTERM" => SIGTERM, + "16" | "STKFLT" | "SIGSTKFLT" => SIGSTKFLT, + "17" | "CHLD" | "SIGCHLD" => SIGCHLD, + "18" | "CONT" | "SIGCONT" => SIGCONT, + "19" | "STOP" | "SIGSTOP" => SIGSTOP, + "20" | "TSTP" | "SIGTSTP" => SIGTSTP, + "21" | "TTIN" | "SIGTTIN" => SIGTTIN, + "22" | "TTOU" | "SIGTTOU" => SIGTTOU, + "23" | "URG" | "SIGURG" => SIGURG, + "24" | "XCPU" | "SIGXCPU" => SIGXCPU, + "25" | "XFSZ" | "SIGXFSZ" => SIGXFSZ, + "26" | "VTALRM" | "SIGVTALRM" => SIGVTALRM, + "27" | "PROF" | "SIGPROF" => SIGPROF, + "28" | "WINCH" | "SIGWINCH" => SIGWINCH, + "29" | "IO" | "SIGIO" => SIGIO, + "30" | "PWR" | "SIGPWR" => SIGPWR, + "31" | "SYS" | "SIGSYS" => SIGSYS, + _ => bail! {"{} is not a valid signal", self}, + }) + } } From 6e5eee04a481f383dda389315a0a998b53a37e10 Mon Sep 17 00:00:00 2001 From: utam0k Date: Sun, 4 Jul 2021 19:00:44 +0900 Subject: [PATCH 42/70] add tests for the signal. --- src/signal.rs | 73 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 63 insertions(+), 10 deletions(-) diff --git a/src/signal.rs b/src/signal.rs index b988f3c75..30b151f13 100644 --- a/src/signal.rs +++ b/src/signal.rs @@ -11,16 +11,16 @@ impl ToSignal for String { fn to_signal(&self) -> Result { use Signal::*; Ok(match self.to_ascii_uppercase().as_str() { - "1" | "HUP" | "SIGHUP" => Signal::SIGHUP, - "2" | "INT" | "SIGINT" => Signal::SIGINT, - "3" | "QUIT" | "SIGQUIT" => Signal::SIGQUIT, - "4" | "ILL" | "SIGILL" => Signal::SIGILL, - "5" | "BUS" | "SIGBUS" => Signal::SIGBUS, - "6" | "ABRT" | "IOT" | "SIGABRT" | "SIGIOT" => Signal::SIGABRT, - "7" | "TRAP" | "SIGTRAP" => Signal::SIGTRAP, - "8" | "FPE" | "SIGFPE" => Signal::SIGFPE, - "9" | "KILL" | "SIGKILL" => Signal::SIGKILL, - "10" | "USR1" | "SIGUSR1" => Signal::SIGUSR1, + "1" | "HUP" | "SIGHUP" => SIGHUP, + "2" | "INT" | "SIGINT" => SIGINT, + "3" | "QUIT" | "SIGQUIT" => SIGQUIT, + "4" | "ILL" | "SIGILL" => SIGILL, + "5" | "BUS" | "SIGBUS" => SIGBUS, + "6" | "ABRT" | "IOT" | "SIGABRT" | "SIGIOT" => SIGABRT, + "7" | "TRAP" | "SIGTRAP" => SIGTRAP, + "8" | "FPE" | "SIGFPE" => SIGFPE, + "9" | "KILL" | "SIGKILL" => SIGKILL, + "10" | "USR1" | "SIGUSR1" => SIGUSR1, "11" | "SEGV" | "SIGSEGV" => SIGSEGV, "12" | "USR2" | "SIGUSR2" => SIGUSR2, "13" | "PIPE" | "SIGPIPE" => SIGPIPE, @@ -46,3 +46,56 @@ impl ToSignal for String { }) } } + +#[cfg(test)] +mod tests { + use super::*; + use nix::sys::signal::Signal::*; + use std::collections::HashMap; + + #[test] + fn test_conversion_from_string() { + let mut test_sets = HashMap::new(); + test_sets.insert(SIGHUP, vec!["1", "HUP", "SIGHUP"]); + test_sets.insert(SIGINT, vec!["2", "INT", "SIGINT"]); + test_sets.insert(SIGQUIT, vec!["3", "QUIT", "SIGQUIT"]); + test_sets.insert(SIGILL, vec!["4", "ILL", "SIGILL"]); + test_sets.insert(SIGBUS, vec!["5", "BUS", "SIGBUS"]); + test_sets.insert(SIGABRT, vec!["6", "ABRT", "IOT", "SIGABRT", "SIGIOT"]); + test_sets.insert(SIGTRAP, vec!["7", "TRAP", "SIGTRAP"]); + test_sets.insert(SIGFPE, vec!["8", "FPE", "SIGFPE"]); + test_sets.insert(SIGKILL, vec!["9", "KILL", "SIGKILL"]); + test_sets.insert(SIGUSR1, vec!["10", "USR1", "SIGUSR1"]); + test_sets.insert(SIGSEGV, vec!["11", "SEGV", "SIGSEGV"]); + test_sets.insert(SIGUSR2, vec!["12", "USR2", "SIGUSR2"]); + test_sets.insert(SIGPIPE, vec!["13", "PIPE", "SIGPIPE"]); + test_sets.insert(SIGALRM, vec!["14", "ALRM", "SIGALRM"]); + test_sets.insert(SIGTERM, vec!["15", "TERM", "SIGTERM"]); + test_sets.insert(SIGSTKFLT, vec!["16", "STKFLT", "SIGSTKFLT"]); + test_sets.insert(SIGCHLD, vec!["17", "CHLD", "SIGCHLD"]); + test_sets.insert(SIGCONT, vec!["18", "CONT", "SIGCONT"]); + test_sets.insert(SIGSTOP, vec!["19", "STOP", "SIGSTOP"]); + test_sets.insert(SIGTSTP, vec!["20", "TSTP", "SIGTSTP"]); + test_sets.insert(SIGTTIN, vec!["21", "TTIN", "SIGTTIN"]); + test_sets.insert(SIGTTOU, vec!["22", "TTOU", "SIGTTOU"]); + test_sets.insert(SIGURG, vec!["23", "URG", "SIGURG"]); + test_sets.insert(SIGXCPU, vec!["24", "XCPU", "SIGXCPU"]); + test_sets.insert(SIGXFSZ, vec!["25", "XFSZ", "SIGXFSZ"]); + test_sets.insert(SIGVTALRM, vec!["26", "VTALRM", "SIGVTALRM"]); + test_sets.insert(SIGPROF, vec!["27", "PROF", "SIGPROF"]); + test_sets.insert(SIGWINCH, vec!["28", "WINCH", "SIGWINCH"]); + test_sets.insert(SIGIO, vec!["29", "IO", "SIGIO"]); + test_sets.insert(SIGPWR, vec!["30", "PWR", "SIGPWR"]); + test_sets.insert(SIGSYS, vec!["31", "SYS", "SIGSYS"]); + for (signal, strings) in test_sets { + for s in strings { + assert_eq!(signal, s.to_string().to_signal().unwrap()); + } + } + } + + #[test] + fn test_conversion_from_string_should_be_failed() { + assert!("invalid".to_string().to_signal().is_err()) + } +} From 53c9b73bcccf7398d5ce79938d3b489cb1631ca3 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:44:07 +0200 Subject: [PATCH 43/70] Split container builder into dedicated init and tenant builders The current monolithic builder provides options that should only be called during init and not when creating a tenant and vice versa. This puts the burden on the user of the builder to know which methods are safe to call. Now the ContainerBuilder can be used to specify options that are common to both scenarios and afterwards as_init/as_tenant can be called to provide scenario specific options. This also simplifies the whole "if init then else" branching logic during container build. --- src/container/builder.rs | 207 +++++--------------------------- src/container/builder_impl.rs | 30 +++-- src/container/init_builder.rs | 114 ++++++++++++++++++ src/container/mod.rs | 6 +- src/container/tenant_builder.rs | 98 +++++++++++++++ src/create.rs | 44 ++----- src/main.rs | 4 +- src/rootless.rs | 22 ++++ 8 files changed, 299 insertions(+), 226 deletions(-) create mode 100644 src/container/init_builder.rs create mode 100644 src/container/tenant_builder.rs diff --git a/src/container/builder.rs b/src/container/builder.rs index 5e5b66319..c75834cdb 100644 --- a/src/container/builder.rs +++ b/src/container/builder.rs @@ -1,78 +1,40 @@ -#![allow(unused_imports, unused_variables)] +use crate::command::linux::LinuxCommand; +use std::path::PathBuf; -use std::{ - collections::HashMap, - fs, - path::{Path, PathBuf}, -}; - -use anyhow::{bail, Result}; -use nix::unistd; -use oci_spec::Spec; - -use crate::{command::{Syscall, linux::{self, LinuxCommand}, syscall::create_syscall}, notify_socket::NotifyListener, rootless::{self, lookup_map_binaries, should_use_rootless, Rootless}, tty, utils}; +use super::{init_builder::InitContainerBuilder, tenant_builder::TenantContainerBuilder}; +pub struct ContainerBuilder { + pub(super) container_id: String, -use super::{builder_impl::ContainerBuilderImpl, Container, ContainerStatus}; + pub(super) root_path: PathBuf, -pub struct ContainerBuilder { - // defaults - /// - init: bool, - /// - use_systemd: bool, - /// - syscall: LinuxCommand, - //// - root_path: PathBuf, + pub(super) syscall: LinuxCommand, - // required - /// - container_id: String, - /// - bundle: Option, + pub(super) pid_file: Option, - // optional - /// - pid_file: Option, - /// - console_socket: Option, + pub(super) console_socket: Option, } impl ContainerBuilder { - pub fn new_init>(container_id: String, bundle: P) -> Result { - let bundle = Some(fs::canonicalize(bundle.into())?); - let root_path = PathBuf::from("/run/youki"); - - Ok(Self { - init: true, - use_systemd: true, - syscall: LinuxCommand, - root_path, - container_id, - bundle, - pid_file: None, - console_socket: None, - }) - } - - pub fn new_tenant(container_id: String) -> Self { + pub fn new(container_id: String) -> Self { let root_path = PathBuf::from("/run/youki"); Self { - init: false, - use_systemd: true, - syscall: LinuxCommand, - root_path, container_id, - bundle: None, + root_path, + syscall: LinuxCommand, pid_file: None, console_socket: None, } } - pub fn with_systemd(mut self, should_use: bool) -> Self { - self.use_systemd = should_use; - self + #[allow(clippy::wrong_self_convention)] + pub fn as_tenant(self) -> TenantContainerBuilder { + TenantContainerBuilder::new(self) + } + + #[allow(clippy::wrong_self_convention)] + pub fn as_init>(self, bundle: P) -> InitContainerBuilder { + InitContainerBuilder::new(self, bundle.into()) } pub fn with_root_path>(mut self, path: P) -> Self { @@ -89,129 +51,14 @@ impl ContainerBuilder { self.console_socket = Some(path.into()); self } - - pub fn with_env(mut self, env: HashMap) -> Self { - todo!(); - } - - pub fn with_cwd>(mut self, path: P) -> Self { - todo!(); - } - - pub fn with_container_command(mut self, command: Vec) -> Self { - todo!(); - } - - pub fn build(mut self) -> Result<()> { - let container_dir = self.prepare_container_dir()?; - let spec = self.load_and_safeguard_spec(&container_dir)?; - unistd::chdir(&*container_dir)?; - - let container = if self.init { - Some(self.create_container_state(&container_dir)?) - } else { - None - }; - - let notify_socket: NotifyListener = NotifyListener::new(&container_dir)?; - // convert path of root file system of the container to absolute path - let rootfs = fs::canonicalize(&spec.root.path)?; - - // if socket file path is given in commandline options, - // get file descriptors of console socket - let csocketfd = if let Some(console_socket) = &self.console_socket { - Some(tty::setup_console_socket(&container_dir, console_socket)?) - } else { - None - }; - - let rootless = self.is_rootless_required(&spec)?; - - let mut builder_impl = ContainerBuilderImpl { - init: self.init, - use_systemd: self.use_systemd, - root_path: self.root_path, - container_id: self.container_id, - pid_file: self.pid_file, - syscall: self.syscall, - console_socket: csocketfd, - rootless, - container_dir, - spec, - rootfs, - notify_socket, - container, - }; - - builder_impl.create()?; - Ok(()) - } - - fn prepare_container_dir(&mut self) -> Result { - let container_dir = self.root_path.join(&self.container_id); - log::debug!("container directory will be {:?}", container_dir); - - match (self.init, container_dir.exists()) { - (true, true) => bail!("container {} already exists", self.container_id), - (true, false) => utils::create_dir_all(&container_dir)?, - (false, true) => {} - (false, false) => bail!("container {} does not exist", self.container_id), - } - - Ok(container_dir) - } - - fn load_and_safeguard_spec(&self, container_dir: &Path) -> Result { - let spec_path = if self.init { - let config_path = self.bundle.as_ref().unwrap().join("config.json"); - fs::copy(&config_path, container_dir.join("config.json"))?; - config_path - } else { - container_dir.join("config.json") - }; - - let spec = oci_spec::Spec::load(spec_path)?; - Ok(spec) - } - - fn is_rootless_required(&self, spec: &Spec) -> Result> { - let linux = spec.linux.as_ref().unwrap(); - - let rootless = if should_use_rootless() { - log::debug!("rootless container should be created"); - log::warn!( - "resource constraints and multi id mapping is unimplemented for rootless containers" - ); - rootless::validate(spec)?; - let mut rootless = Rootless::from(linux); - if let Some((uid_binary, gid_binary)) = lookup_map_binaries(linux)? { - rootless.newuidmap = Some(uid_binary); - rootless.newgidmap = Some(gid_binary); - } - Some(rootless) - } else { - None - }; - - Ok(rootless) - } - - fn create_container_state(&self, container_dir: &Path) -> Result { - let container = Container::new( - &self.container_id, - ContainerStatus::Creating, - None, - self.bundle.as_ref().unwrap().to_str().unwrap(), - &container_dir, - )?; - container.save()?; - Ok(container) - } } #[cfg(test)] mod tests { + use std::collections::HashMap; + use super::*; + use anyhow::Result; // required values (must be specified in new...) // - create @@ -250,11 +97,12 @@ mod tests { let console_socket = PathBuf::from(""); let root_path = PathBuf::from(""); - let container = ContainerBuilder::new_init(id, bundle)? + let container = ContainerBuilder::new(id) .with_pid_file(pid_file) // optional .with_console_socket(console_socket) //optional - .with_systemd(false) // overwrite default .with_root_path(root_path) // overwrite default + .as_init(bundle) + .with_systemd(false) .build()?; Ok(()) @@ -268,9 +116,10 @@ mod tests { let cwd = PathBuf::from(""); let env = HashMap::new(); - let container = ContainerBuilder::new_tenant(id) + let container = ContainerBuilder::new(id) .with_pid_file(pid_file) .with_console_socket(console_socket) + .as_tenant() .with_cwd(cwd) .with_env(env) .with_container_command(vec!["sleep".to_owned(), "9001".to_owned()]) diff --git a/src/container/builder_impl.rs b/src/container/builder_impl.rs index 0c9206d3d..b797ba8fa 100644 --- a/src/container/builder_impl.rs +++ b/src/container/builder_impl.rs @@ -1,17 +1,26 @@ -use std::{path::PathBuf}; +use std::path::PathBuf; -use anyhow::{Result}; +use anyhow::Result; use nix::{ sched, unistd::{Gid, Uid}, }; use oci_spec::Spec; -use crate::{cgroups, command::{Syscall, linux::LinuxCommand}, namespaces::Namespaces, notify_socket::NotifyListener, process::{Process, fork, setup_init_process}, rootless::{Rootless}, stdio::FileDescriptor, tty, utils}; +use crate::{ + cgroups, + command::{linux::LinuxCommand, Syscall}, + namespaces::Namespaces, + notify_socket::NotifyListener, + process::{fork, setup_init_process, Process}, + rootless::Rootless, + stdio::FileDescriptor, + tty, utils, +}; use super::{Container, ContainerStatus}; -pub(super) struct ContainerBuilderImpl{ +pub(super) struct ContainerBuilderImpl { pub init: bool, pub syscall: LinuxCommand, pub use_systemd: bool, @@ -80,7 +89,12 @@ impl ContainerBuilderImpl { // This is actually the child process after fork Process::Init(mut init) => { // prepare process - setup_init_process(&self.spec, &self.syscall, self.rootfs.clone(), &namespaces)?; + setup_init_process( + &self.spec, + &self.syscall, + self.rootfs.clone(), + &namespaces, + )?; init.ready()?; self.notify_socket.wait_for_container_start()?; // actually run the command / program to be run in container @@ -91,9 +105,9 @@ impl ContainerBuilderImpl { if let Some(container) = &self.container { // the command / program is done executing container - .refresh_state()? - .update_status(ContainerStatus::Stopped) - .save()?; + .refresh_state()? + .update_status(ContainerStatus::Stopped) + .save()?; } Ok(Process::Init(init)) diff --git a/src/container/init_builder.rs b/src/container/init_builder.rs new file mode 100644 index 000000000..363d5d867 --- /dev/null +++ b/src/container/init_builder.rs @@ -0,0 +1,114 @@ +use anyhow::{bail, Context, Result}; +use nix::unistd; +use oci_spec::Spec; +use rootless::detect_rootless; +use std::{ + fs, + path::{Path, PathBuf}, +}; + +use crate::{notify_socket::NotifyListener, rootless, tty, utils}; + +use super::{ + builder::ContainerBuilder, builder_impl::ContainerBuilderImpl, Container, ContainerStatus, +}; + +pub struct InitContainerBuilder { + base: ContainerBuilder, + bundle: PathBuf, + use_systemd: bool, +} + +impl InitContainerBuilder { + pub(super) fn new(builder: ContainerBuilder, bundle: PathBuf) -> Self { + Self { + base: builder, + bundle, + use_systemd: true, + } + } + + pub fn with_systemd(mut self, should_use: bool) -> Self { + self.use_systemd = should_use; + self + } + + pub fn build(self) -> Result<()> { + let container_dir = self.create_container_dir()?; + let spec = self.load_and_safeguard_spec(&container_dir)?; + + unistd::chdir(&*container_dir)?; + let container_state = self.create_container_state(&container_dir)?; + + let notify_socket: NotifyListener = NotifyListener::new(&container_dir)?; + // convert path of root file system of the container to absolute path + let rootfs = fs::canonicalize(&spec.root.path)?; + + // if socket file path is given in commandline options, + // get file descriptors of console socket + let csocketfd = if let Some(console_socket) = &self.base.console_socket { + Some(tty::setup_console_socket(&container_dir, console_socket)?) + } else { + None + }; + + let rootless = detect_rootless(&spec)?; + + let mut builder_impl = ContainerBuilderImpl { + init: true, + syscall: self.base.syscall, + container_id: self.base.container_id, + root_path: self.base.root_path, + pid_file: self.base.pid_file, + console_socket: csocketfd, + use_systemd: self.use_systemd, + container_dir, + spec, + rootfs, + rootless, + notify_socket, + container: Some(container_state), + }; + + builder_impl.create()?; + Ok(()) + } + + fn create_container_dir(&self) -> Result { + let container_dir = self.base.root_path.join(&self.base.container_id); + log::debug!("container directory will be {:?}", container_dir); + + if container_dir.exists() { + bail!("container {} already exists", self.base.container_id); + } + + utils::create_dir_all(&container_dir)?; + Ok(container_dir) + } + + fn load_and_safeguard_spec(&self, container_dir: &Path) -> Result { + let source_spec_path = self.bundle.join("config.json"); + let target_spec_path = container_dir.join("config.json"); + fs::copy(&source_spec_path, &target_spec_path).with_context(|| { + format!( + "failed to copy {:?} to {:?}", + source_spec_path, target_spec_path + ) + })?; + + let spec = oci_spec::Spec::load(&target_spec_path)?; + Ok(spec) + } + + fn create_container_state(&self, container_dir: &Path) -> Result { + let container = Container::new( + &self.base.container_id, + ContainerStatus::Creating, + None, + self.bundle.as_path().to_str().unwrap(), + &container_dir, + )?; + container.save()?; + Ok(container) + } +} diff --git a/src/container/mod.rs b/src/container/mod.rs index 9df944bfe..59873ad22 100644 --- a/src/container/mod.rs +++ b/src/container/mod.rs @@ -1,9 +1,11 @@ //! Container management +pub mod builder; +mod builder_impl; #[allow(clippy::module_inception)] mod container; +pub mod init_builder; mod state; -mod builder_impl; -pub mod builder; +pub mod tenant_builder; pub use container::Container; pub use state::{ContainerStatus, State}; diff --git a/src/container/tenant_builder.rs b/src/container/tenant_builder.rs new file mode 100644 index 000000000..fd48f8b40 --- /dev/null +++ b/src/container/tenant_builder.rs @@ -0,0 +1,98 @@ +use anyhow::{bail, Result}; +use oci_spec::Spec; +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, +}; + +use crate::{notify_socket::NotifyListener, rootless::detect_rootless, tty}; + +use super::{builder::ContainerBuilder, builder_impl::ContainerBuilderImpl}; + +pub struct TenantContainerBuilder { + base: ContainerBuilder, + env: HashMap, + cwd: Option, + command: Vec, +} + +impl TenantContainerBuilder { + pub(super) fn new(builder: ContainerBuilder) -> Self { + Self { + base: builder, + env: HashMap::new(), + cwd: None, + command: vec!["sh".to_owned()], + } + } + + pub fn with_env(mut self, env: HashMap) -> Self { + self.env = env; + self + } + + pub fn with_cwd>(mut self, path: P) -> Self { + self.cwd = Some(path.into()); + self + } + + pub fn with_container_command(mut self, command: Vec) -> Self { + self.command = command; + self + } + + pub fn build(self) -> Result<()> { + let container_dir = self.lookup_container_dir()?; + let spec = self.load_init_spec(&container_dir)?; + + let notify_socket: NotifyListener = NotifyListener::new(&container_dir)?; + // convert path of root file system of the container to absolute path + let rootfs = fs::canonicalize(&spec.root.path)?; + + // if socket file path is given in commandline options, + // get file descriptors of console socket + let csocketfd = if let Some(console_socket) = &self.base.console_socket { + Some(tty::setup_console_socket(&container_dir, console_socket)?) + } else { + None + }; + + let rootless = detect_rootless(&spec)?; + + let mut builder_impl = ContainerBuilderImpl { + init: false, + syscall: self.base.syscall, + container_id: self.base.container_id, + root_path: self.base.root_path, + pid_file: self.base.pid_file, + console_socket: csocketfd, + use_systemd: false, + container_dir, + spec, + rootfs, + rootless, + notify_socket, + container: None, + }; + + builder_impl.create()?; + Ok(()) + } + + fn lookup_container_dir(&self) -> Result { + let container_dir = self.base.root_path.join(&self.base.container_id); + if !container_dir.exists() { + bail!("container {} does not exist", self.base.container_id); + } + + Ok(container_dir) + } + + fn load_init_spec(&self, container_dir: &Path) -> Result { + let spec_path = container_dir.join("config.json"); + + let spec = oci_spec::Spec::load(spec_path)?; + Ok(spec) + } +} diff --git a/src/create.rs b/src/create.rs index 43d45a29a..d0f6b8471 100644 --- a/src/create.rs +++ b/src/create.rs @@ -1,28 +1,9 @@ -#![allow(unused_imports)] - //! Handles the creation of a new container -use std::fs; -use std::path::{Path, PathBuf}; -use std::process; - -use anyhow::{bail, Result}; +use anyhow::Result; use clap::Clap; -use nix::sched; -use nix::unistd; -use nix::unistd::{Gid, Uid}; +use std::path::PathBuf; use crate::container::builder::ContainerBuilder; -use crate::container::{Container, ContainerStatus}; -use crate::namespaces::Namespaces; -use crate::notify_socket::NotifyListener; -use crate::process::{fork, Process}; -use crate::rootfs; -use crate::rootless::{lookup_map_binaries, should_use_rootless, Rootless}; -use crate::stdio::FileDescriptor; -use crate::tty; -use crate::utils; -use crate::{capabilities, command::Syscall}; -use crate::{cgroups, rootless}; /// This is the main structure which stores various commandline options given by /// high-level container runtime @@ -49,14 +30,8 @@ pub struct Create { // associated with it like any other process. impl Create { /// Starts a new container process - pub fn exec( - &self, - root_path: PathBuf, - systemd_cgroup: bool, - command: impl Syscall, - ) -> Result<()> { - - let mut builder = ContainerBuilder::new_init(self.container_id.to_owned(), self.bundle.clone())?; + pub fn exec(&self, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> { + let mut builder = ContainerBuilder::new(self.container_id.clone()); if let Some(pid_file) = &self.pid_file { builder = builder.with_pid_file(pid_file); } @@ -64,14 +39,13 @@ impl Create { if let Some(console_socket) = &self.console_socket { builder = builder.with_console_socket(console_socket); } - + builder - .with_root_path(root_path) - .with_systemd(systemd_cgroup) - .build()?; + .with_root_path(root_path) + .as_init(&self.bundle) + .with_systemd(systemd_cgroup) + .build()?; Ok(()) } } - - diff --git a/src/main.rs b/src/main.rs index b244d09f5..df57eecfb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,6 @@ use std::path::PathBuf; use anyhow::Result; use clap::Clap; -use youki::command::linux::LinuxCommand; use youki::create; use youki::delete; use youki::info; @@ -18,6 +17,7 @@ use youki::rootless::should_use_rootless; use youki::start; use youki::state; + /// High-level commandline option definition /// This takes global options as well as individual commands as specified in [OCI runtime-spec](https://github.com/opencontainers/runtime-spec/blob/master/runtime.md) /// Also check [runc commandline documentation](https://github.com/opencontainers/runc/blob/master/man/runc.8.md) for more explanation @@ -78,7 +78,7 @@ fn main() -> Result<()> { let systemd_cgroup = opts.systemd_cgroup; match opts.subcmd { - SubCommand::Create(create) => create.exec(root_path, systemd_cgroup, LinuxCommand), + SubCommand::Create(create) => create.exec(root_path, systemd_cgroup), SubCommand::Start(start) => start.exec(root_path), SubCommand::Kill(kill) => kill.exec(root_path), SubCommand::Delete(delete) => delete.exec(root_path, systemd_cgroup), diff --git a/src/rootless.rs b/src/rootless.rs index 799b39fa8..3841260da 100644 --- a/src/rootless.rs +++ b/src/rootless.rs @@ -29,6 +29,28 @@ impl From<&Linux> for Rootless { } } +pub fn detect_rootless(spec: &Spec) -> Result> { + let linux = spec.linux.as_ref().unwrap(); + + let rootless = if should_use_rootless() { + log::debug!("rootless container should be created"); + log::warn!( + "resource constraints and multi id mapping is unimplemented for rootless containers" + ); + validate(spec)?; + let mut rootless = Rootless::from(linux); + if let Some((uid_binary, gid_binary)) = lookup_map_binaries(linux)? { + rootless.newuidmap = Some(uid_binary); + rootless.newgidmap = Some(gid_binary); + } + Some(rootless) + } else { + None + }; + + Ok(rootless) +} + /// Checks if rootless mode should be used pub fn should_use_rootless() -> bool { if !nix::unistd::geteuid().is_root() { From 5146d0ee5ebe93ce61e509fb627185a9f0c0212c Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Mon, 5 Jul 2021 19:40:35 +0200 Subject: [PATCH 44/70] Add documentation --- src/container/builder.rs | 76 +++++++++++++++++++++++++++++++++ src/container/init_builder.rs | 5 +++ src/container/tenant_builder.rs | 9 ++++ 3 files changed, 90 insertions(+) diff --git a/src/container/builder.rs b/src/container/builder.rs index c75834cdb..472a6e455 100644 --- a/src/container/builder.rs +++ b/src/container/builder.rs @@ -14,7 +14,32 @@ pub struct ContainerBuilder { pub(super) console_socket: Option, } +/// Builder that can be used to configure the common properties of +/// either a init or a tenant container +/// +/// # Example +/// +/// ```no_run +/// use youki::container::builder::ContainerBuilder; +/// +/// ContainerBuilder::new("74f1a4cb3801".to_owned()) +/// .with_root_path("/run/containers/youki") +/// .with_pid_file("/var/run/docker.pid") +/// .with_console_socket("/var/run/docker/sock.tty") +/// .as_init("/var/run/docker/bundle") +/// .build(); +/// ``` impl ContainerBuilder { + /// Generates the base configuration for a container which can be + /// transformed into either a init container or a tenant container + /// + /// # Example + /// + /// ```no_run + /// use youki::container::builder::ContainerBuilder; + /// + /// let builder = ContainerBuilder::new("74f1a4cb3801".to_owned()); + /// ``` pub fn new(container_id: String) -> Self { let root_path = PathBuf::from("/run/youki"); @@ -27,26 +52,77 @@ impl ContainerBuilder { } } + /// Transforms this builder into a tenant builder + /// # Example + /// + /// ```no_run + /// # use youki::container::builder::ContainerBuilder; + /// + /// ContainerBuilder::new("74f1a4cb3801".to_owned()) + /// .as_tenant() + /// .with_container_command(vec!["sleep".to_owned(), "9001".to_owned()]) + /// .build(); + /// ``` #[allow(clippy::wrong_self_convention)] pub fn as_tenant(self) -> TenantContainerBuilder { TenantContainerBuilder::new(self) } + /// Transforms this builder into an init builder + /// # Example + /// + /// ```no_run + /// # use youki::container::builder::ContainerBuilder; + /// + /// ContainerBuilder::new("74f1a4cb3801".to_owned()) + /// .as_init("/var/run/docker/bundle") + /// .with_systemd(false) + /// .build(); + /// ``` #[allow(clippy::wrong_self_convention)] pub fn as_init>(self, bundle: P) -> InitContainerBuilder { InitContainerBuilder::new(self, bundle.into()) } + /// Sets the root path which will be used to store the container state + /// # Example + /// + /// ```no_run + /// # use youki::container::builder::ContainerBuilder; + /// + /// ContainerBuilder::new("74f1a4cb3801".to_owned()) + /// .with_root_path("/run/containers/youki"); + /// ``` pub fn with_root_path>(mut self, path: P) -> Self { self.root_path = path.into(); self } + /// Sets the pid file which will be used to write the pid of the container + /// process + /// # Example + /// + /// ```no_run + /// # use youki::container::builder::ContainerBuilder; + /// + /// ContainerBuilder::new("74f1a4cb3801".to_owned()) + /// .with_pid_file("/var/run/docker.pid"); + /// ``` pub fn with_pid_file>(mut self, path: P) -> Self { self.pid_file = Some(path.into()); self } + /// Sets the console socket, which will be used to send the file descriptor + /// of the pseudoterminal + /// # Example + /// + /// ```no_run + /// # use youki::container::builder::ContainerBuilder; + /// + /// ContainerBuilder::new("74f1a4cb3801".to_owned()) + /// .with_console_socket("/var/run/docker/sock.tty"); + /// ``` pub fn with_console_socket>(mut self, path: P) -> Self { self.console_socket = Some(path.into()); self diff --git a/src/container/init_builder.rs b/src/container/init_builder.rs index 363d5d867..26ddbfae6 100644 --- a/src/container/init_builder.rs +++ b/src/container/init_builder.rs @@ -13,6 +13,7 @@ use super::{ builder::ContainerBuilder, builder_impl::ContainerBuilderImpl, Container, ContainerStatus, }; +// Builder that can be used to configure the properties of a new container pub struct InitContainerBuilder { base: ContainerBuilder, bundle: PathBuf, @@ -20,6 +21,8 @@ pub struct InitContainerBuilder { } impl InitContainerBuilder { + /// Generates the base configuration for a new container from which + /// configuration methods can be chained pub(super) fn new(builder: ContainerBuilder, bundle: PathBuf) -> Self { Self { base: builder, @@ -28,11 +31,13 @@ impl InitContainerBuilder { } } + /// Sets if systemd should be used for managing cgroups pub fn with_systemd(mut self, should_use: bool) -> Self { self.use_systemd = should_use; self } + /// Creates a new container pub fn build(self) -> Result<()> { let container_dir = self.create_container_dir()?; let spec = self.load_and_safeguard_spec(&container_dir)?; diff --git a/src/container/tenant_builder.rs b/src/container/tenant_builder.rs index fd48f8b40..386c3a6f0 100644 --- a/src/container/tenant_builder.rs +++ b/src/container/tenant_builder.rs @@ -10,6 +10,8 @@ use crate::{notify_socket::NotifyListener, rootless::detect_rootless, tty}; use super::{builder::ContainerBuilder, builder_impl::ContainerBuilderImpl}; +/// Builder that can be used to configure the properties of a process +/// that will join an existing container sandbox pub struct TenantContainerBuilder { base: ContainerBuilder, env: HashMap, @@ -18,6 +20,9 @@ pub struct TenantContainerBuilder { } impl TenantContainerBuilder { + /// Generates the base configuration for a process that will join + /// an existing container sandbox from which configuration methods + /// can be chained pub(super) fn new(builder: ContainerBuilder) -> Self { Self { base: builder, @@ -27,21 +32,25 @@ impl TenantContainerBuilder { } } + /// Sets environment variables for the container pub fn with_env(mut self, env: HashMap) -> Self { self.env = env; self } + /// Sets the working directory of the container pub fn with_cwd>(mut self, path: P) -> Self { self.cwd = Some(path.into()); self } + /// Sets the command the container will be started with pub fn with_container_command(mut self, command: Vec) -> Self { self.command = command; self } + /// Joins an existing container pub fn build(self) -> Result<()> { let container_dir = self.lookup_container_dir()?; let spec = self.load_init_spec(&container_dir)?; From 9be94d9475ca5322dc2c90c153163e0356d1e2ae Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Mon, 5 Jul 2021 19:50:34 +0200 Subject: [PATCH 45/70] Remove tests --- src/container/builder.rs | 75 ----------------------------------- src/container/builder_impl.rs | 2 +- 2 files changed, 1 insertion(+), 76 deletions(-) diff --git a/src/container/builder.rs b/src/container/builder.rs index 472a6e455..e9146e72c 100644 --- a/src/container/builder.rs +++ b/src/container/builder.rs @@ -129,78 +129,3 @@ impl ContainerBuilder { } } -#[cfg(test)] -mod tests { - use std::collections::HashMap; - - use super::*; - use anyhow::Result; - - // required values (must be specified in new...) - // - create - // - id - // - bundle - // - exec - // - id - // - // use with_... methods to specify - // optional values - // - console-socket - // - pid-file - // - // overwritable values - // - systemd (default true) - // - root_path (default /run/youki) - // - // overwritable values (for exec only?) - // - env - // - cwd - // - container command - // - // calculated in build() - // computed values - // - rootless - // - container_dir - // - spec - // - notify_socket - // - container - - // create - fn test_create_init() -> Result<()> { - let id = "".to_owned(); - let bundle = PathBuf::from(""); - let pid_file = PathBuf::from(""); - let console_socket = PathBuf::from(""); - let root_path = PathBuf::from(""); - - let container = ContainerBuilder::new(id) - .with_pid_file(pid_file) // optional - .with_console_socket(console_socket) //optional - .with_root_path(root_path) // overwrite default - .as_init(bundle) - .with_systemd(false) - .build()?; - - Ok(()) - } - - // exec - fn test_create_tenant() -> Result<()> { - let id = "".to_owned(); - let pid_file = PathBuf::from(""); - let console_socket = PathBuf::from(""); - let cwd = PathBuf::from(""); - let env = HashMap::new(); - - let container = ContainerBuilder::new(id) - .with_pid_file(pid_file) - .with_console_socket(console_socket) - .as_tenant() - .with_cwd(cwd) - .with_env(env) - .with_container_command(vec!["sleep".to_owned(), "9001".to_owned()]) - .build()?; - - Ok(()) - } -} diff --git a/src/container/builder_impl.rs b/src/container/builder_impl.rs index b797ba8fa..4510cd70a 100644 --- a/src/container/builder_impl.rs +++ b/src/container/builder_impl.rs @@ -37,7 +37,7 @@ pub(super) struct ContainerBuilderImpl { } impl ContainerBuilderImpl { - pub fn create(&mut self) -> Result<()> { + pub(super) fn create(&mut self) -> Result<()> { if let Process::Parent(_) = self.run_container()? { std::process::exit(0); } From 1643dd2e935babb4db520285fbb84619d4e6b4ae Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Mon, 5 Jul 2021 19:57:01 +0200 Subject: [PATCH 46/70] Renaming --- src/capabilities.rs | 4 ++-- src/command/linux.rs | 6 +++--- src/command/syscall.rs | 6 +++--- src/command/test.rs | 10 +++++----- src/container/builder.rs | 6 +++--- src/container/builder_impl.rs | 4 ++-- src/namespaces.rs | 6 +++--- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/capabilities.rs b/src/capabilities.rs index 773f6d578..3348dc0fc 100644 --- a/src/capabilities.rs +++ b/src/capabilities.rs @@ -44,11 +44,11 @@ pub fn drop_privileges(cs: &LinuxCapabilities, syscall: &impl Syscall) -> Result #[cfg(test)] mod tests { use super::*; - use crate::command::test::TestHelperCommand; + use crate::command::test::TestHelperSyscall; #[test] fn test_reset_effective() { - let test_command = TestHelperCommand::default(); + let test_command = TestHelperSyscall::default(); assert!(reset_effective(&test_command).is_ok()); let set_capability_args: Vec<_> = test_command .get_set_capability_args() diff --git a/src/command/linux.rs b/src/command/linux.rs index f34016b50..e2cb86eea 100644 --- a/src/command/linux.rs +++ b/src/command/linux.rs @@ -29,9 +29,9 @@ use crate::capabilities; /// Empty structure to implement Command trait for #[derive(Clone)] -pub struct LinuxCommand; +pub struct LinuxSyscall; -impl LinuxCommand { +impl LinuxSyscall { unsafe fn from_raw_buf<'a, T>(p: *const c_char) -> T where T: From<&'a OsStr>, @@ -46,7 +46,7 @@ impl LinuxCommand { } } -impl Syscall for LinuxCommand { +impl Syscall for LinuxSyscall { /// To enable dynamic typing, /// see https://doc.rust-lang.org/std/any/index.html for more information fn as_any(&self) -> &dyn Any { diff --git a/src/command/syscall.rs b/src/command/syscall.rs index 2ba95eae0..f3bba8727 100644 --- a/src/command/syscall.rs +++ b/src/command/syscall.rs @@ -12,7 +12,7 @@ use nix::{ use oci_spec::LinuxRlimit; -use crate::command::{linux::LinuxCommand, test::TestHelperCommand}; +use crate::command::{linux::LinuxSyscall, test::TestHelperSyscall}; /// This specifies various kernel/other functionalities required for /// container management @@ -30,8 +30,8 @@ pub trait Syscall { pub fn create_syscall() -> Box { if cfg!(test) { - Box::new(TestHelperCommand::default()) + Box::new(TestHelperSyscall::default()) } else { - Box::new(LinuxCommand) + Box::new(LinuxSyscall) } } diff --git a/src/command/test.rs b/src/command/test.rs index 01f1ac7dd..eaa7c3ac0 100644 --- a/src/command/test.rs +++ b/src/command/test.rs @@ -7,15 +7,15 @@ use oci_spec::LinuxRlimit; use super::Syscall; #[derive(Clone)] -pub struct TestHelperCommand { +pub struct TestHelperSyscall { set_ns_args: RefCell>, unshare_args: RefCell>, set_capability_args: RefCell>, } -impl Default for TestHelperCommand { +impl Default for TestHelperSyscall { fn default() -> Self { - TestHelperCommand { + TestHelperSyscall { set_ns_args: RefCell::new(vec![]), unshare_args: RefCell::new(vec![]), set_capability_args: RefCell::new(vec![]), @@ -23,7 +23,7 @@ impl Default for TestHelperCommand { } } -impl Syscall for TestHelperCommand { +impl Syscall for TestHelperSyscall { fn as_any(&self) -> &dyn Any { self } @@ -66,7 +66,7 @@ impl Syscall for TestHelperCommand { } } -impl TestHelperCommand { +impl TestHelperSyscall { pub fn get_setns_args(&self) -> Vec<(i32, CloneFlags)> { self.set_ns_args.borrow_mut().clone() } diff --git a/src/container/builder.rs b/src/container/builder.rs index e9146e72c..8730c5d11 100644 --- a/src/container/builder.rs +++ b/src/container/builder.rs @@ -1,4 +1,4 @@ -use crate::command::linux::LinuxCommand; +use crate::command::linux::LinuxSyscall; use std::path::PathBuf; use super::{init_builder::InitContainerBuilder, tenant_builder::TenantContainerBuilder}; @@ -7,7 +7,7 @@ pub struct ContainerBuilder { pub(super) root_path: PathBuf, - pub(super) syscall: LinuxCommand, + pub(super) syscall: LinuxSyscall, pub(super) pid_file: Option, @@ -46,7 +46,7 @@ impl ContainerBuilder { Self { container_id, root_path, - syscall: LinuxCommand, + syscall: LinuxSyscall, pid_file: None, console_socket: None, } diff --git a/src/container/builder_impl.rs b/src/container/builder_impl.rs index 4510cd70a..cf85bddc9 100644 --- a/src/container/builder_impl.rs +++ b/src/container/builder_impl.rs @@ -9,7 +9,7 @@ use oci_spec::Spec; use crate::{ cgroups, - command::{linux::LinuxCommand, Syscall}, + command::{linux::LinuxSyscall, Syscall}, namespaces::Namespaces, notify_socket::NotifyListener, process::{fork, setup_init_process, Process}, @@ -22,7 +22,7 @@ use super::{Container, ContainerStatus}; pub(super) struct ContainerBuilderImpl { pub init: bool, - pub syscall: LinuxCommand, + pub syscall: LinuxSyscall, pub use_systemd: bool, pub container_id: String, pub root_path: PathBuf, diff --git a/src/namespaces.rs b/src/namespaces.rs index b6e81325e..1f2361652 100644 --- a/src/namespaces.rs +++ b/src/namespaces.rs @@ -81,7 +81,7 @@ mod tests { use oci_spec::LinuxNamespaceType; use super::*; - use crate::command::test::TestHelperCommand; + use crate::command::test::TestHelperSyscall; fn gen_sample_linux_namespaces() -> Vec { vec![ @@ -112,7 +112,7 @@ mod tests { fn test_namespaces_set_ns() { let sample_linux_namespaces = gen_sample_linux_namespaces(); let namespaces: Namespaces = sample_linux_namespaces.into(); - let test_command: &TestHelperCommand = namespaces.command.as_any().downcast_ref().unwrap(); + let test_command: &TestHelperSyscall = namespaces.command.as_any().downcast_ref().unwrap(); assert!(namespaces.apply_setns().is_ok()); let mut setns_args: Vec<_> = test_command @@ -132,7 +132,7 @@ mod tests { let namespaces: Namespaces = sample_linux_namespaces.into(); assert!(namespaces.apply_unshare(CloneFlags::CLONE_NEWIPC).is_ok()); - let test_command: &TestHelperCommand = namespaces.command.as_any().downcast_ref().unwrap(); + let test_command: &TestHelperSyscall = namespaces.command.as_any().downcast_ref().unwrap(); let mut unshare_args = test_command.get_unshare_args(); unshare_args.sort(); let mut expect = vec![CloneFlags::CLONE_NEWUSER | CloneFlags::CLONE_NEWPID]; From d2ff085b870a01e86eeeb1f7fad7bb99f9b43979 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Mon, 5 Jul 2021 22:32:14 +0200 Subject: [PATCH 47/70] Fix kill cmd test failures --- oci_spec/src/lib.rs | 11 +++++++---- src/container/init_builder.rs | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index b3a3324af..2a93d7de7 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -610,12 +610,15 @@ impl Spec { pub fn load>(path: P) -> Result { let path = path.as_ref(); let file = File::open(path).with_context(|| format!("failed to open {:?}", path))?; - let mut spec: Spec = serde_json::from_reader(&file)?; - // FIME: It is fail if the caller isn't in the correct directory. - spec.root.path = std::fs::canonicalize(&spec.root.path) - .with_context(|| format!("failed to canonicalize {:?}", spec.root.path))?; + let spec: Spec = serde_json::from_reader(&file)?; Ok(spec) } + + pub fn canonicalize_rootfs(&mut self) -> Result<()> { + self.root.path = std::fs::canonicalize(&self.root.path) + .with_context(|| format!("failed to canonicalize {:?}", self.root.path))?; + Ok(()) + } } #[cfg(feature = "proptests")] diff --git a/src/container/init_builder.rs b/src/container/init_builder.rs index 26ddbfae6..4adbf6967 100644 --- a/src/container/init_builder.rs +++ b/src/container/init_builder.rs @@ -101,7 +101,9 @@ impl InitContainerBuilder { ) })?; - let spec = oci_spec::Spec::load(&target_spec_path)?; + let mut spec = oci_spec::Spec::load(&target_spec_path)?; + unistd::chdir(&self.bundle)?; + spec.canonicalize_rootfs()?; Ok(spec) } From 0eaa90655f251b9e042147f6b0742062d39dcfdd Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Mon, 5 Jul 2021 22:42:58 +0200 Subject: [PATCH 48/70] Execute doc tests --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 34ddb608e..567c1e2bb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,6 +31,8 @@ jobs: run: ./build.sh - name: Run tests run: cargo test + - name: Run doc tests + run: cargo test --doc integration_tests: runs-on: ubuntu-latest steps: From e76360b3783b590b192aafdc09c2999ac14c3f0a Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Tue, 6 Jul 2021 21:41:51 +0200 Subject: [PATCH 49/70] Review feedback and fmt --- src/capabilities.rs | 2 +- src/cgroups/v1/pids.rs | 7 ++----- src/cgroups/v1/util.rs | 2 +- src/cgroups/v2/systemd_manager.rs | 14 ++++++++------ src/command/mod.rs | 2 +- src/container/builder.rs | 19 +++++++++---------- src/container/init_builder.rs | 2 +- src/process/fork.rs | 14 +++++++------- src/process/init.rs | 9 +++++++-- src/process/mod.rs | 2 +- 10 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/capabilities.rs b/src/capabilities.rs index 3348dc0fc..79088fdf3 100644 --- a/src/capabilities.rs +++ b/src/capabilities.rs @@ -1,4 +1,4 @@ -use crate::command::{Syscall}; +use crate::command::Syscall; use caps::*; use anyhow::Result; diff --git a/src/cgroups/v1/pids.rs b/src/cgroups/v1/pids.rs index f09db1b45..025bed7d9 100644 --- a/src/cgroups/v1/pids.rs +++ b/src/cgroups/v1/pids.rs @@ -1,4 +1,4 @@ -use std::path::{Path}; +use std::path::Path; use anyhow::Result; @@ -10,10 +10,7 @@ pub struct Pids {} impl Controller for Pids { type Resource = LinuxPids; - fn apply( - linux_resources: &LinuxResources, - cgroup_root: &Path, - ) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply pids cgroup config"); if let Some(pids) = &linux_resources.pids { diff --git a/src/cgroups/v1/util.rs b/src/cgroups/v1/util.rs index dd070d6b2..389bbddba 100644 --- a/src/cgroups/v1/util.rs +++ b/src/cgroups/v1/util.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, path::PathBuf}; use anyhow::{anyhow, Result}; use procfs::process::Process; -use super::{ControllerType, controller_type::CONTROLLERS}; +use super::{controller_type::CONTROLLERS, ControllerType}; pub fn list_subsystem_mount_points() -> Result> { let mut mount_paths = HashMap::with_capacity(CONTROLLERS.len()); diff --git a/src/cgroups/v2/systemd_manager.rs b/src/cgroups/v2/systemd_manager.rs index 375fc1a7e..ff37c06eb 100644 --- a/src/cgroups/v2/systemd_manager.rs +++ b/src/cgroups/v2/systemd_manager.rs @@ -261,9 +261,10 @@ mod tests { #[test] fn get_cgroups_path_works_with_a_complex_slice() -> Result<()> { - let cgroups_path = - SystemDCGroupManager::destructure_cgroups_path(PathBuf::from("test-a-b.slice:docker:foo")) - .expect(""); + let cgroups_path = SystemDCGroupManager::destructure_cgroups_path(PathBuf::from( + "test-a-b.slice:docker:foo", + )) + .expect(""); assert_eq!( SystemDCGroupManager::construct_cgroups_path(cgroups_path)?, @@ -275,9 +276,10 @@ mod tests { #[test] fn get_cgroups_path_works_with_a_simple_slice() -> Result<()> { - let cgroups_path = - SystemDCGroupManager::destructure_cgroups_path(PathBuf::from("machine.slice:libpod:foo")) - .expect(""); + let cgroups_path = SystemDCGroupManager::destructure_cgroups_path(PathBuf::from( + "machine.slice:libpod:foo", + )) + .expect(""); assert_eq!( SystemDCGroupManager::construct_cgroups_path(cgroups_path)?, diff --git a/src/command/mod.rs b/src/command/mod.rs index a8bcd927f..543997e8e 100644 --- a/src/command/mod.rs +++ b/src/command/mod.rs @@ -2,9 +2,9 @@ //! This provides a uniform interface for rest of Youki //! to call syscalls required for container management +pub mod linux; #[allow(clippy::module_inception)] pub mod syscall; -pub mod linux; pub mod test; pub use syscall::Syscall; diff --git a/src/container/builder.rs b/src/container/builder.rs index 8730c5d11..cfaf6dc98 100644 --- a/src/container/builder.rs +++ b/src/container/builder.rs @@ -14,9 +14,9 @@ pub struct ContainerBuilder { pub(super) console_socket: Option, } -/// Builder that can be used to configure the common properties of -/// either a init or a tenant container -/// +/// Builder that can be used to configure the common properties of +/// either a init or a tenant container +/// /// # Example /// /// ```no_run @@ -54,7 +54,7 @@ impl ContainerBuilder { /// Transforms this builder into a tenant builder /// # Example - /// + /// /// ```no_run /// # use youki::container::builder::ContainerBuilder; /// @@ -70,7 +70,7 @@ impl ContainerBuilder { /// Transforms this builder into an init builder /// # Example - /// + /// /// ```no_run /// # use youki::container::builder::ContainerBuilder; /// @@ -86,7 +86,7 @@ impl ContainerBuilder { /// Sets the root path which will be used to store the container state /// # Example - /// + /// /// ```no_run /// # use youki::container::builder::ContainerBuilder; /// @@ -101,7 +101,7 @@ impl ContainerBuilder { /// Sets the pid file which will be used to write the pid of the container /// process /// # Example - /// + /// /// ```no_run /// # use youki::container::builder::ContainerBuilder; /// @@ -113,10 +113,10 @@ impl ContainerBuilder { self } - /// Sets the console socket, which will be used to send the file descriptor + /// Sets the console socket, which will be used to send the file descriptor /// of the pseudoterminal /// # Example - /// + /// /// ```no_run /// # use youki::container::builder::ContainerBuilder; /// @@ -128,4 +128,3 @@ impl ContainerBuilder { self } } - diff --git a/src/container/init_builder.rs b/src/container/init_builder.rs index 4adbf6967..b6789f21e 100644 --- a/src/container/init_builder.rs +++ b/src/container/init_builder.rs @@ -21,7 +21,7 @@ pub struct InitContainerBuilder { } impl InitContainerBuilder { - /// Generates the base configuration for a new container from which + /// Generates the base configuration for a new container from which /// configuration methods can be chained pub(super) fn new(builder: ContainerBuilder, bundle: PathBuf) -> Self { Self { diff --git a/src/process/fork.rs b/src/process/fork.rs index ca2a4a67c..915f802cd 100644 --- a/src/process/fork.rs +++ b/src/process/fork.rs @@ -74,14 +74,14 @@ pub fn fork_first>( } if let Some(container) = container { - // update status and pid of the container process - container - .update_status(ContainerStatus::Created) - .set_creator(nix::unistd::geteuid().as_raw()) - .set_pid(init_pid) - .save()?; + // update status and pid of the container process + container + .update_status(ContainerStatus::Created) + .set_creator(nix::unistd::geteuid().as_raw()) + .set_pid(init_pid) + .save()?; } - + // if file to write the pid to is specified, write pid of the child if let Some(pid_file) = pid_file { fs::write(&pid_file, format!("{}", child))?; diff --git a/src/process/init.rs b/src/process/init.rs index 64f61f529..4615cc4c1 100644 --- a/src/process/init.rs +++ b/src/process/init.rs @@ -2,9 +2,14 @@ use std::{io::Write, path::PathBuf}; use anyhow::Result; use mio::unix::pipe::Sender; -use nix::{sched, unistd::{Gid, Uid}}; +use nix::{ + sched, + unistd::{Gid, Uid}, +}; -use crate::{capabilities, command::Syscall, namespaces::Namespaces, process::message::Message, rootfs}; +use crate::{ + capabilities, command::Syscall, namespaces::Namespaces, process::message::Message, rootfs, +}; /// Contains sending end for pipe for the child process pub struct InitProcess { diff --git a/src/process/mod.rs b/src/process/mod.rs index 16c59a55f..65b1b907a 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -10,7 +10,7 @@ mod child; mod init; mod parent; -pub use init::{InitProcess, setup_init_process}; +pub use init::{setup_init_process, InitProcess}; /// Used to describe type of process after fork. /// Parent and child processes mean the same thing as in a normal fork call From 3dd98c5a98e243c405a9ccf35010ef02302027a5 Mon Sep 17 00:00:00 2001 From: Furisto <24721048+Furisto@users.noreply.github.com> Date: Tue, 6 Jul 2021 18:06:16 +0200 Subject: [PATCH 50/70] Reduce binary size --- Cargo.lock | 32 -------------------------------- Cargo.toml | 10 ++++++++-- src/cgroups/v1/hugetlb.rs | 28 ++++++++++++++-------------- 3 files changed, 22 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad0c867f6..8d338e067 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,17 +23,6 @@ version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28b2cd92db5cbd74e8e5028f7e27dd7aa3090e89e4f2a197cc7c8dfb69c7063b" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.0.1" @@ -107,14 +96,12 @@ version = "3.0.0-beta.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bd1061998a501ee7d4b6d449020df3266ca3124b941ec56cf2005c3779ca142" dependencies = [ - "atty", "bitflags", "clap_derive", "indexmap", "lazy_static", "os_str_bytes", "strsim", - "termcolor", "textwrap", "unicode-width", "vec_map", @@ -878,15 +865,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "termcolor" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" -dependencies = [ - "winapi-util", -] - [[package]] name = "textwrap" version = "0.12.1" @@ -984,15 +962,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -1018,7 +987,6 @@ dependencies = [ "prctl", "procfs", "quickcheck", - "regex", "serde", "serde_json", "serial_test", diff --git a/Cargo.toml b/Cargo.toml index d54821a2c..ee1892a3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,8 +5,12 @@ authors = ["utam0k "] edition = "2018" description = "A container runtime written in Rust" +[dependencies.clap] +version = "3.0.0-beta.2" +default-features = false +features = ["std", "suggestions", "derive"] + [dependencies] -clap = "3.0.0-beta.2" nix = "0.19.1" procfs = "0.9.1" caps = "0.5.1" @@ -20,7 +24,6 @@ mio = { version = "0.7", features = ["os-ext", "os-poll"] } chrono = { version="0.4", features = ["serde"] } once_cell = "1.6.0" futures = { version = "0.3", features = ["thread-pool"] } -regex = "1.5" oci_spec = { version = "0.1.0", path = "./oci_spec" } systemd = { version = "0.8", default-features = false } dbus = "0.9.2" @@ -30,3 +33,6 @@ tabwriter = "1" oci_spec = { version = "0.1.0", path = "./oci_spec", features = ["proptests"] } quickcheck = "1" serial_test = "0.5.1" + +[profile.release] +lto = true \ No newline at end of file diff --git a/src/cgroups/v1/hugetlb.rs b/src/cgroups/v1/hugetlb.rs index 7b4b2b66a..3e3551285 100644 --- a/src/cgroups/v1/hugetlb.rs +++ b/src/cgroups/v1/hugetlb.rs @@ -1,7 +1,6 @@ use std::path::Path; use anyhow::{bail, Result}; -use regex::Regex; use crate::cgroups::{common, v1::Controller}; use oci_spec::{LinuxHugepageLimit, LinuxResources}; @@ -34,16 +33,14 @@ impl Controller for Hugetlb { impl Hugetlb { fn apply(root_path: &Path, hugetlb: &LinuxHugepageLimit) -> Result<()> { - let re = Regex::new(r"(?P[0-9]+)[KMG]B")?; - let caps = re.captures(&hugetlb.page_size); - match caps { - None => bail!("page size must be in the format [0-9]+[KMG]B"), - Some(caps) => { - let page_size: u64 = caps["pagesize"].parse()?; - if !Self::is_power_of_two(page_size) { - bail!("page size must be in the format of 2^(integer)"); - } - } + let page_size: String = hugetlb + .page_size + .chars() + .take_while(|c| c.is_digit(10)) + .collect(); + let page_size: u64 = page_size.parse()?; + if !Self::is_power_of_two(page_size) { + bail!("page size must be in the format of 2^(integer)"); } common::write_cgroup_file( @@ -106,10 +103,13 @@ mod tests { let result = Hugetlb::apply(&tmp, &hugetlb); - let re = Regex::new(r"(?P[0-9]+)[KMG]B").expect("create regex for parsing pagesize"); - let caps = re.captures(&hugetlb.page_size).expect("should capture pagesize"); + let page_size: String = hugetlb + .page_size + .chars() + .take_while(|c| c.is_digit(10)) + .collect(); + let page_size: u64 = page_size.parse().expect("parse page size"); - let page_size: u64 = caps["pagesize"].parse().expect("should contain captured pagesize"); if Hugetlb::is_power_of_two(page_size) && page_size != 1 { let content = read_to_string(tmp.join(page_file_name)).expect("Read hugetlb file content"); From d16136f9bff4629042f3abb9ecba42231494d2d6 Mon Sep 17 00:00:00 2001 From: Zheming Li Date: Wed, 23 Jun 2021 15:15:01 +0800 Subject: [PATCH 51/70] Add cgroup v2 freezer controller --- oci_spec/src/lib.rs | 2 +- src/cgroups/v1/pids.rs | 7 +- src/cgroups/v1/util.rs | 2 +- src/cgroups/v2/controller_type.rs | 2 + src/cgroups/v2/freezer.rs | 193 ++++++++++++++++++++++++++++++ src/cgroups/v2/manager.rs | 8 +- src/cgroups/v2/mod.rs | 1 + src/cgroups/v2/systemd_manager.rs | 20 ++-- 8 files changed, 220 insertions(+), 15 deletions(-) create mode 100644 src/cgroups/v2/freezer.rs diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index 2836fab83..079a527cf 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -557,7 +557,7 @@ pub enum LinuxSeccompOperator { ScmpCmpMaskedEq = 7, } -#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq)] pub enum FreezerState { Undefined, Frozen, diff --git a/src/cgroups/v1/pids.rs b/src/cgroups/v1/pids.rs index f09db1b45..025bed7d9 100644 --- a/src/cgroups/v1/pids.rs +++ b/src/cgroups/v1/pids.rs @@ -1,4 +1,4 @@ -use std::path::{Path}; +use std::path::Path; use anyhow::Result; @@ -10,10 +10,7 @@ pub struct Pids {} impl Controller for Pids { type Resource = LinuxPids; - fn apply( - linux_resources: &LinuxResources, - cgroup_root: &Path, - ) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path) -> Result<()> { log::debug!("Apply pids cgroup config"); if let Some(pids) = &linux_resources.pids { diff --git a/src/cgroups/v1/util.rs b/src/cgroups/v1/util.rs index dd070d6b2..389bbddba 100644 --- a/src/cgroups/v1/util.rs +++ b/src/cgroups/v1/util.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, path::PathBuf}; use anyhow::{anyhow, Result}; use procfs::process::Process; -use super::{ControllerType, controller_type::CONTROLLERS}; +use super::{controller_type::CONTROLLERS, ControllerType}; pub fn list_subsystem_mount_points() -> Result> { let mut mount_paths = HashMap::with_capacity(CONTROLLERS.len()); diff --git a/src/cgroups/v2/controller_type.rs b/src/cgroups/v2/controller_type.rs index 016a4064f..c5cfb7201 100644 --- a/src/cgroups/v2/controller_type.rs +++ b/src/cgroups/v2/controller_type.rs @@ -5,6 +5,7 @@ pub enum ControllerType { Memory, HugeTlb, Pids, + Freezer, } impl ToString for ControllerType { @@ -16,6 +17,7 @@ impl ToString for ControllerType { Self::Memory => "memory".into(), Self::HugeTlb => "hugetlb".into(), Self::Pids => "pids".into(), + Self::Freezer => "freezer".into(), } } } diff --git a/src/cgroups/v2/freezer.rs b/src/cgroups/v2/freezer.rs new file mode 100644 index 000000000..ef0643909 --- /dev/null +++ b/src/cgroups/v2/freezer.rs @@ -0,0 +1,193 @@ +use anyhow::{bail, Result}; +use std::{ + fs::OpenOptions, + io::{BufRead, BufReader, Read, Seek, SeekFrom, Write}, + path::Path, + str, thread, + time::Duration, +}; + +use oci_spec::{FreezerState, LinuxResources}; + +use super::controller::Controller; + +const CGROUP_FREEZE: &str = "cgroup.freeze"; +const CGROUP_EVENTS: &str = "cgroup.events"; + +pub struct Freezer {} + +impl Controller for Freezer { + fn apply(linux_resources: &LinuxResources, cgroup_path: &Path) -> Result<()> { + if let Some(freezer_state) = linux_resources.freezer { + Self::apply(freezer_state, cgroup_path)?; + } + + Ok(()) + } +} + +impl Freezer { + fn apply(freezer_state: FreezerState, path: &Path) -> Result<()> { + let state_str = match freezer_state { + FreezerState::Undefined => return Ok(()), + FreezerState::Frozen => "1", + FreezerState::Thawed => "0", + }; + + match OpenOptions::new() + .create(false) + .write(true) + .open(path.join(CGROUP_FREEZE)) + { + Err(e) => { + if let FreezerState::Frozen = freezer_state { + bail!("freezer not supported {}", e); + } + return Ok(()); + } + Ok(mut file) => file.write_all(state_str.as_bytes())?, + }; + + // confirm that the cgroup did actually change states. + let actual_state = Self::read_freezer_state(path)?; + if !actual_state.eq(&freezer_state) { + bail!( + "expected \"cgroup.freeze\" to be in state {:?} but was in {:?}", + freezer_state, + actual_state + ); + } + + Ok(()) + } + + fn read_freezer_state(path: &Path) -> Result { + let mut buf = [0; 1]; + OpenOptions::new() + .create(false) + .read(true) + .open(path.join(CGROUP_FREEZE))? + .read_exact(&mut buf)?; + + let state = str::from_utf8(&buf)?; + match state { + "0" => Ok(FreezerState::Thawed), + "1" => Self::wait_frozen(path), + _ => bail!("unknown \"cgroup.freeze\" state: {}", state), + } + } + + // wait_frozen polls cgroup.events until it sees "frozen 1" in it. + fn wait_frozen(path: &Path) -> Result { + let f = OpenOptions::new() + .create(false) + .read(true) + .open(path.join(CGROUP_EVENTS))?; + let mut f = BufReader::new(f); + + let wait_time = Duration::from_millis(10); + let max_iter = 1000; + let mut iter = 0; + let mut line = String::new(); + + loop { + if iter == max_iter { + bail!( + "timeout of {} ms reached waiting for the cgroup to freeze", + wait_time.as_millis() * max_iter + ); + } + line.clear(); + let num_bytes = f.read_line(&mut line)?; + if num_bytes == 0 { + break; + } + if line.starts_with("frozen ") { + if line.starts_with("frozen 1") { + if iter > 1 { + log::debug!("frozen after {} retries", iter) + } + return Ok(FreezerState::Frozen); + } + iter += 1; + thread::sleep(wait_time); + f.seek(SeekFrom::Start(0))?; + } + } + + Ok(FreezerState::Undefined) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; + use oci_spec::FreezerState; + use std::sync::Arc; + + #[test] + fn test_set_freezer_state() { + let tmp = Arc::new( + create_temp_dir("test_set_freezer_state").expect("create temp directory for test"), + ); + set_fixture(&tmp, CGROUP_FREEZE, "").expect("Set fixure for freezer state"); + set_fixture(&tmp, CGROUP_EVENTS, "populated 0\nfrozen 0") + .expect("Set fixure for freezer state"); + + // set Frozen state. + { + // use another thread to update events file async. + let p = Arc::clone(&tmp); + thread::spawn(move || { + thread::sleep(Duration::from_millis(100)); + set_fixture(&p, CGROUP_EVENTS, "populated 0\nfrozen 1") + .expect("Set fixure for freezer state"); + }); + let freezer_state = FreezerState::Frozen; + Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZE)).expect("Read to string"); + assert_eq!("1", state_content); + } + + // set Thawed state. + { + let freezer_state = FreezerState::Thawed; + Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZE)).expect("Read to string"); + assert_eq!("0", state_content); + } + + // set Undefined state. + { + let old_state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZE)).expect("Read to string"); + let freezer_state = FreezerState::Undefined; + Freezer::apply(freezer_state, &tmp).expect("Set freezer state"); + + let state_content = + std::fs::read_to_string(tmp.join(CGROUP_FREEZE)).expect("Read to string"); + assert_eq!(old_state_content, state_content); + } + } + + #[test] + fn test_set_freezer_state_error() { + let tmp = + create_temp_dir("test_set_freezer_state").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_FREEZE, "").expect("Set fixure for freezer state"); + set_fixture(&tmp, CGROUP_EVENTS, "").expect("Set fixure for freezer state"); + + // events file does not contain "frozen 1" + { + let freezer_state = FreezerState::Frozen; + let r = Freezer::apply(freezer_state, &tmp); + assert!(r.is_err()); + } + } +} diff --git a/src/cgroups/v2/manager.rs b/src/cgroups/v2/manager.rs index 5a605e294..32b17779a 100644 --- a/src/cgroups/v2/manager.rs +++ b/src/cgroups/v2/manager.rs @@ -9,7 +9,10 @@ use anyhow::{bail, Result}; use nix::unistd::Pid; use oci_spec::LinuxResources; -use super::{cpu::Cpu, cpuset::CpuSet, hugetlb::HugeTlb, io::Io, memory::Memory, pids::Pids}; +use super::{ + cpu::Cpu, cpuset::CpuSet, freezer::Freezer, hugetlb::HugeTlb, io::Io, memory::Memory, + pids::Pids, +}; use crate::{ cgroups::v2::controller::Controller, cgroups::{ @@ -29,6 +32,7 @@ const CONTROLLER_TYPES: &[ControllerType] = &[ ControllerType::Io, ControllerType::Memory, ControllerType::Pids, + ControllerType::Freezer, ]; pub struct Manager { @@ -97,6 +101,7 @@ impl Manager { "io" => controllers.push(ControllerType::Io), "memory" => controllers.push(ControllerType::Memory), "pids" => controllers.push(ControllerType::Pids), + "freezer" => controllers.push(ControllerType::Freezer), tpe => log::warn!("Controller {} is not yet implemented.", tpe), } } @@ -128,6 +133,7 @@ impl CgroupManager for Manager { ControllerType::Io => Io::apply(linux_resources, &self.full_path)?, ControllerType::Memory => Memory::apply(linux_resources, &self.full_path)?, ControllerType::Pids => Pids::apply(linux_resources, &self.full_path)?, + ControllerType::Freezer => Freezer::apply(linux_resources, &self.full_path)?, } } diff --git a/src/cgroups/v2/mod.rs b/src/cgroups/v2/mod.rs index a3c10f481..f86f1b8a0 100644 --- a/src/cgroups/v2/mod.rs +++ b/src/cgroups/v2/mod.rs @@ -2,6 +2,7 @@ mod controller; mod controller_type; mod cpu; mod cpuset; +mod freezer; mod hugetlb; mod io; pub mod manager; diff --git a/src/cgroups/v2/systemd_manager.rs b/src/cgroups/v2/systemd_manager.rs index 375fc1a7e..874a5b99d 100644 --- a/src/cgroups/v2/systemd_manager.rs +++ b/src/cgroups/v2/systemd_manager.rs @@ -8,7 +8,10 @@ use nix::unistd::Pid; use oci_spec::LinuxResources; use std::path::{Path, PathBuf}; -use super::{cpu::Cpu, cpuset::CpuSet, hugetlb::HugeTlb, io::Io, memory::Memory, pids::Pids}; +use super::{ + cpu::Cpu, cpuset::CpuSet, freezer::Freezer, hugetlb::HugeTlb, io::Io, memory::Memory, + pids::Pids, +}; use crate::cgroups::common; use crate::cgroups::common::CgroupManager; use crate::cgroups::v2::controller::Controller; @@ -234,6 +237,7 @@ impl CgroupManager for SystemDCGroupManager { ControllerType::Io => Io::apply(linux_resources, &self.full_path)?, ControllerType::Memory => Memory::apply(linux_resources, &self.full_path)?, ControllerType::Pids => Pids::apply(linux_resources, &self.full_path)?, + ControllerType::Freezer => Freezer::apply(linux_resources, &self.full_path)?, } } @@ -261,9 +265,10 @@ mod tests { #[test] fn get_cgroups_path_works_with_a_complex_slice() -> Result<()> { - let cgroups_path = - SystemDCGroupManager::destructure_cgroups_path(PathBuf::from("test-a-b.slice:docker:foo")) - .expect(""); + let cgroups_path = SystemDCGroupManager::destructure_cgroups_path(PathBuf::from( + "test-a-b.slice:docker:foo", + )) + .expect(""); assert_eq!( SystemDCGroupManager::construct_cgroups_path(cgroups_path)?, @@ -275,9 +280,10 @@ mod tests { #[test] fn get_cgroups_path_works_with_a_simple_slice() -> Result<()> { - let cgroups_path = - SystemDCGroupManager::destructure_cgroups_path(PathBuf::from("machine.slice:libpod:foo")) - .expect(""); + let cgroups_path = SystemDCGroupManager::destructure_cgroups_path(PathBuf::from( + "machine.slice:libpod:foo", + )) + .expect(""); assert_eq!( SystemDCGroupManager::construct_cgroups_path(cgroups_path)?, From 7e02cb86f082633e476a8b7b75dd1a0387ab619d Mon Sep 17 00:00:00 2001 From: utam0k Date: Sat, 10 Jul 2021 15:48:50 +0900 Subject: [PATCH 52/70] fix the warnings shown by cargo clippy --- src/cgroups/v1/memory.rs | 2 +- src/cgroups/v2/pids.rs | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/cgroups/v1/memory.rs b/src/cgroups/v1/memory.rs index 760f1d565..641fd0eea 100644 --- a/src/cgroups/v1/memory.rs +++ b/src/cgroups/v1/memory.rs @@ -343,7 +343,7 @@ mod tests { } quickcheck! { - fn property_test_set_memory(linux_memory: LinuxMemory, disable_oom_killer: bool, pid_int: i32) -> bool { + fn property_test_set_memory(linux_memory: LinuxMemory, disable_oom_killer: bool) -> bool { let tmp = create_temp_dir("property_test_set_memory").expect("create temp directory for test"); set_fixture(&tmp, CGROUP_MEMORY_USAGE, "0").expect("Set fixure for memory usage"); diff --git a/src/cgroups/v2/pids.rs b/src/cgroups/v2/pids.rs index 27a0bb262..9c7faf171 100644 --- a/src/cgroups/v2/pids.rs +++ b/src/cgroups/v2/pids.rs @@ -26,10 +26,7 @@ impl Pids { } else { "max".to_string() }; - Ok(common::write_cgroup_file( - &root_path.join("pids.max"), - &limit, - )?) + common::write_cgroup_file(&root_path.join("pids.max"), &limit) } } From aadfe59d22d3f8ad0ce96aeedcc4723c6a0fd924 Mon Sep 17 00:00:00 2001 From: song Date: Wed, 7 Jul 2021 19:03:26 +0800 Subject: [PATCH 53/70] support cgroupv2 io controller --- src/cgroups/v2/io.rs | 231 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 228 insertions(+), 3 deletions(-) diff --git a/src/cgroups/v2/io.rs b/src/cgroups/v2/io.rs index ebd3ab959..67ef510ab 100644 --- a/src/cgroups/v2/io.rs +++ b/src/cgroups/v2/io.rs @@ -1,12 +1,237 @@ -use anyhow::Result; +use std::path::{Path, PathBuf}; + +use anyhow::{bail, Result}; + +use crate::cgroups::common; use super::controller::Controller; -use oci_spec::LinuxResources; +use oci_spec::{LinuxBlockIo, LinuxResources}; + +const CGROUP_BFQ_IO_WEIGHT: &str = "io.bfq.weight"; +const CGROUP_IO_WEIGHT: &str = "io.weight"; pub struct Io {} impl Controller for Io { - fn apply(_: &LinuxResources, _: &std::path::Path) -> Result<()> { + fn apply(linux_resource: &LinuxResources, cgroup_root: &Path) -> Result<()> { + log::debug!("Apply io cgrup v2 config"); + if let Some(io) = &linux_resource.block_io { + Self::apply(cgroup_root, io)?; + } + Ok(()) + } +} + +impl Io { + fn io_max_path(path: &Path) -> PathBuf { + path.join("io.max") + } + + // linux kernel doc: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#io + fn apply(root_path: &Path, blkio: &LinuxBlockIo) -> Result<()> { + for wd in &blkio.blkio_weight_device { + common::write_cgroup_file( + root_path.join(CGROUP_BFQ_IO_WEIGHT), + &format!("{}:{} {}", wd.major, wd.minor, wd.weight.unwrap()), + )?; + } + if let Some(leaf_weight) = blkio.blkio_leaf_weight { + if leaf_weight > 0 { + bail!("cannot set leaf_weight with cgroupv2"); + } + } + if let Some(io_weight) = blkio.blkio_weight { + if io_weight > 0 { + common::write_cgroup_file( + root_path.join(CGROUP_IO_WEIGHT), + format!("{}", io_weight), + )?; + } + } + + for trbd in &blkio.blkio_throttle_read_bps_device { + common::write_cgroup_file( + Self::io_max_path(root_path), + &format!("{}:{} rbps={}", trbd.major, trbd.minor, trbd.rate), + )?; + } + + for twbd in &blkio.blkio_throttle_write_bps_device { + common::write_cgroup_file( + Self::io_max_path(root_path), + format!("{}:{} wbps={}", twbd.major, twbd.minor, twbd.rate), + )?; + } + for trid in &blkio.blkio_throttle_read_iops_device { + common::write_cgroup_file( + Self::io_max_path(root_path), + format!("{}:{} riops={}", trid.major, trid.minor, trid.rate), + )?; + } + for twid in &blkio.blkio_throttle_write_iops_device { + common::write_cgroup_file( + Self::io_max_path(root_path), + format!("{}:{} wiops={}", twid.major, twid.minor, twid.rate), + )?; + } Ok(()) } } +#[cfg(test)] +mod test { + use super::*; + use crate::cgroups::test::setup; + use oci_spec::{LinuxBlockIo, LinuxThrottleDevice, LinuxWeightDevice}; + use std::fs; + struct BlockIoBuilder { + block_io: LinuxBlockIo, + } + impl BlockIoBuilder { + fn new() -> Self { + let block_io = LinuxBlockIo { + blkio_weight: Some(0), + blkio_leaf_weight: Some(0), + blkio_weight_device: vec![], + blkio_throttle_read_bps_device: vec![], + blkio_throttle_write_bps_device: vec![], + blkio_throttle_read_iops_device: vec![], + blkio_throttle_write_iops_device: vec![], + }; + + Self { block_io } + } + fn with_write_weight_device(mut self, throttle: Vec) -> Self { + self.block_io.blkio_weight_device = throttle; + self + } + fn with_write_io_weight(mut self, iow: u16) -> Self { + self.block_io.blkio_weight = Some(iow); + self + } + + fn with_read_bps(mut self, throttle: Vec) -> Self { + self.block_io.blkio_throttle_read_bps_device = throttle; + self + } + + fn with_write_bps(mut self, throttle: Vec) -> Self { + self.block_io.blkio_throttle_write_bps_device = throttle; + self + } + + fn with_read_iops(mut self, throttle: Vec) -> Self { + self.block_io.blkio_throttle_read_iops_device = throttle; + self + } + + fn with_write_iops(mut self, throttle: Vec) -> Self { + self.block_io.blkio_throttle_write_iops_device = throttle; + self + } + + fn build(self) -> LinuxBlockIo { + self.block_io + } + } + + #[test] + fn test_set_io_read_bps() { + let (tmp, throttle) = setup("test_set_io_read_bps", "io.max"); + + let blkio = BlockIoBuilder::new() + .with_read_bps(vec![LinuxThrottleDevice { + major: 8, + minor: 0, + rate: 102400, + }]) + .build(); + + Io::apply(&tmp, &blkio).expect("apply blkio"); + let content = fs::read_to_string(throttle).unwrap_or_else(|_| panic!("read rbps content")); + + assert_eq!("8:0 rbps=102400", content); + } + + #[test] + fn test_set_io_write_bps() { + let (tmp, throttle) = setup("test_set_io_write_bps", "io.max"); + + let blkio = BlockIoBuilder::new() + .with_write_bps(vec![LinuxThrottleDevice { + major: 8, + minor: 0, + rate: 102400, + }]) + .build(); + + Io::apply(&tmp, &blkio).expect("apply blkio"); + let content = fs::read_to_string(throttle).unwrap_or_else(|_| panic!("read rbps content")); + + assert_eq!("8:0 wbps=102400", content); + } + + #[test] + fn test_set_io_read_iops() { + let (tmp, throttle) = setup("test_set_io_read_iops", "io.max"); + + let blkio = BlockIoBuilder::new() + .with_read_iops(vec![LinuxThrottleDevice { + major: 8, + minor: 0, + rate: 102400, + }]) + .build(); + + Io::apply(&tmp, &blkio).expect("apply blkio"); + let content = fs::read_to_string(throttle).unwrap_or_else(|_| panic!("read riops content")); + + assert_eq!("8:0 riops=102400", content); + } + + #[test] + fn test_set_io_write_iops() { + let (tmp, throttle) = setup("test_set_io_write_iops", "io.max"); + + let blkio = BlockIoBuilder::new() + .with_write_iops(vec![LinuxThrottleDevice { + major: 8, + minor: 0, + rate: 102400, + }]) + .build(); + + Io::apply(&tmp, &blkio).expect("apply blkio"); + let content = fs::read_to_string(throttle).unwrap_or_else(|_| panic!("read wiops content")); + + assert_eq!("8:0 wiops=102400", content); + } + + #[test] + fn test_set_ioweight_device() { + let (tmp, throttle) = setup("test_set_io_weight_device", CGROUP_BFQ_IO_WEIGHT); + let blkio = BlockIoBuilder::new() + .with_write_weight_device(vec![LinuxWeightDevice { + major: 8, + minor: 0, + weight: Some(80), + leaf_weight: Some(0), + }]) + .build(); + Io::apply(&tmp, &blkio).expect("apply blkio"); + let content = + fs::read_to_string(throttle).unwrap_or_else(|_| panic!("read bfq_io_weight content")); + + assert_eq!("8:0 80", content); + } + + #[test] + fn test_set_ioweight() { + let (tmp, throttle) = setup("test_set_io_weight", CGROUP_IO_WEIGHT); + let blkio = BlockIoBuilder::new().with_write_io_weight(100).build(); + Io::apply(&tmp, &blkio).expect("apply blkio"); + let content = + fs::read_to_string(throttle).unwrap_or_else(|_| panic!("read bfq_io_weight content")); + + assert_eq!("100", content); + } +} From 599676e8b62e805c0b2d41d487dacd1108647f18 Mon Sep 17 00:00:00 2001 From: Zheming Li Date: Sun, 11 Jul 2021 15:20:53 +0800 Subject: [PATCH 54/70] Add code format check in CI --- .github/workflows/main.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 567c1e2bb..a4c40fe7f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -7,6 +7,22 @@ on: - main jobs: + rustfmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/cache@v2 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - run: rustup component add rustfmt + - name: Check formatting + run: cargo fmt --all -- --check tests: runs-on: ubuntu-latest steps: From f8b08c08f0dd3f0b8e03007858003dbd2a5ca59e Mon Sep 17 00:00:00 2001 From: Zheming Li Date: Sun, 11 Jul 2021 15:38:45 +0800 Subject: [PATCH 55/70] format code to pass CI check --- oci_spec/src/lib.rs | 4 ++-- src/main.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index 148301efe..88b782283 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use std::fs::File; use std::path::{Path, PathBuf}; -use anyhow::{Context, Result, bail}; +use anyhow::{bail, Context, Result}; use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Clone)] @@ -616,7 +616,7 @@ impl Spec { pub fn canonicalize_rootfs(&mut self) -> Result<()> { self.root.path = std::fs::canonicalize(&self.root.path) - .with_context(|| format!("failed to canonicalize {:?}", self.root.path))?; + .with_context(|| format!("failed to canonicalize {:?}", self.root.path))?; Ok(()) } } diff --git a/src/main.rs b/src/main.rs index df57eecfb..c3eebf2dd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,7 +17,6 @@ use youki::rootless::should_use_rootless; use youki::start; use youki::state; - /// High-level commandline option definition /// This takes global options as well as individual commands as specified in [OCI runtime-spec](https://github.com/opencontainers/runtime-spec/blob/master/runtime.md) /// Also check [runc commandline documentation](https://github.com/opencontainers/runc/blob/master/man/runc.8.md) for more explanation From 92a5a11667c33f26cf93914bf808b0d37c6667f8 Mon Sep 17 00:00:00 2001 From: Zheming Li Date: Sun, 11 Jul 2021 19:22:52 +0800 Subject: [PATCH 56/70] Fix spec path in delete --- oci_spec/src/lib.rs | 3 ++- src/delete.rs | 7 +------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/oci_spec/src/lib.rs b/oci_spec/src/lib.rs index 88b782283..c3a9d68e5 100644 --- a/oci_spec/src/lib.rs +++ b/oci_spec/src/lib.rs @@ -609,7 +609,8 @@ pub struct Spec { impl Spec { pub fn load>(path: P) -> Result { let path = path.as_ref(); - let file = File::open(path).with_context(|| format!("failed to open {:?}", path))?; + let file = + File::open(path).with_context(|| format!("load spec: failed to open {:?}", path))?; let spec: Spec = serde_json::from_reader(&file)?; Ok(spec) } diff --git a/src/delete.rs b/src/delete.rs index 2ddf0dc9e..0c35b6448 100644 --- a/src/delete.rs +++ b/src/delete.rs @@ -1,5 +1,4 @@ use std::fs; -use std::path::Path; use std::path::PathBuf; use anyhow::{bail, Result}; @@ -42,11 +41,7 @@ impl Delete { log::debug!("container status: {:?}", container.status()); if container.can_delete() { if container.root.exists() { - nix::unistd::chdir(&PathBuf::from(&container.state.bundle))?; - let config_absolute_path = &PathBuf::from(&container.state.bundle) - .join(Path::new("config.json")) - .to_string_lossy() - .to_string(); + let config_absolute_path = container.root.join("config.json"); log::debug!("load spec from {:?}", config_absolute_path); let spec = oci_spec::Spec::load(config_absolute_path)?; log::debug!("spec: {:?}", spec); From 560247cde3340619819d76086118ade27e7b2362 Mon Sep 17 00:00:00 2001 From: Yashodhan Joshi Date: Sun, 11 Jul 2021 21:25:25 +0530 Subject: [PATCH 57/70] Document Capabilities and refactor its drop_privileges function --- docs/doc-draft.md | 4 ++++ src/capabilities.rs | 18 ++++++++---------- src/command/linux.rs | 28 ++++++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/docs/doc-draft.md b/docs/doc-draft.md index 4abe5b428..a2a725269 100644 --- a/docs/doc-draft.md +++ b/docs/doc-draft.md @@ -92,3 +92,7 @@ This also provides implementation for Linux syscalls for the trait. [oci runtime specification]: https://github.com/opencontainers/runtime-spec/blob/master/runtime.md [runc man pages]: (https://github.com/opencontainers/runc/blob/master/man/runc.8.md) + +## Capabilities + +- [Simple explanation of capabilities](https://blog.container-solutions.com/linux-capabilities-in-practice) diff --git a/src/capabilities.rs b/src/capabilities.rs index 79088fdf3..051c882be 100644 --- a/src/capabilities.rs +++ b/src/capabilities.rs @@ -1,9 +1,11 @@ +//! Handles Management of Capabilities use crate::command::Syscall; use caps::*; use anyhow::Result; use oci_spec::{LinuxCapabilities, LinuxCapabilityType}; +/// Converts a list of capability types to capabilities has set fn to_set(caps: &[LinuxCapabilityType]) -> CapsHashSet { let mut capabilities = CapsHashSet::new(); for c in caps { @@ -12,29 +14,25 @@ fn to_set(caps: &[LinuxCapabilityType]) -> CapsHashSet { capabilities } +/// reset capabilities of process calling this to effective capabilities +/// effective capability set is set of capabilities used by kernel to perform checks +/// see https://man7.org/linux/man-pages/man7/capabilities.7.html for more information pub fn reset_effective(syscall: &impl Syscall) -> Result<()> { log::debug!("reset all caps"); syscall.set_capability(CapSet::Effective, &caps::all())?; Ok(()) } +/// Drop any extra granted capabilities, and reset to defaults which are in oci specification pub fn drop_privileges(cs: &LinuxCapabilities, syscall: &impl Syscall) -> Result<()> { - let all = caps::all(); log::debug!("dropping bounding capabilities to {:?}", cs.bounding); - for c in all.difference(&to_set(&cs.bounding)) { - match c { - Capability::CAP_PERFMON | Capability::CAP_CHECKPOINT_RESTORE | Capability::CAP_BPF => { - log::warn!("{:?} doesn't support.", c); - continue; - } - _ => caps::drop(None, CapSet::Bounding, *c)?, - } - } + syscall.set_capability(CapSet::Bounding, &to_set(&cs.bounding))?; syscall.set_capability(CapSet::Effective, &to_set(&cs.effective))?; syscall.set_capability(CapSet::Permitted, &to_set(&cs.permitted))?; syscall.set_capability(CapSet::Inheritable, &to_set(&cs.inheritable))?; + // check specifically for ambient, as those might not always be available if let Err(e) = syscall.set_capability(CapSet::Ambient, &to_set(&cs.ambient)) { log::error!("failed to set ambient capabilities: {}", e); } diff --git a/src/command/linux.rs b/src/command/linux.rs index e2cb86eea..cc055c172 100644 --- a/src/command/linux.rs +++ b/src/command/linux.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use std::{any::Any, mem, path::Path, ptr}; use anyhow::{bail, Result}; -use caps::{errors::CapsError, CapSet, CapsHashSet}; +use caps::{errors::CapsError, CapSet, Capability, CapsHashSet}; use libc::{c_char, uid_t}; use nix::{ errno::Errno, @@ -114,7 +114,31 @@ impl Syscall for LinuxSyscall { /// Set capabilities for container process fn set_capability(&self, cset: CapSet, value: &CapsHashSet) -> Result<(), CapsError> { - caps::set(None, cset, value) + match cset { + // caps::set cannot set capabilities in bounding set, + // so we do it differently + CapSet::Bounding => { + // get all capabilities + let all = caps::all(); + // the difference will give capabilities + // which are to be unset + // for each such =, drop that capability + // after this, only those which are to be set will remain set + for c in all.difference(value) { + match c { + Capability::CAP_PERFMON + | Capability::CAP_CHECKPOINT_RESTORE + | Capability::CAP_BPF => { + log::warn!("{:?} is not supported.", c); + continue; + } + _ => caps::drop(None, CapSet::Bounding, *c)?, + } + } + Ok(()) + } + _ => caps::set(None, cset, value), + } } /// Sets hostname for process From d3e2f5746afa4d437191735603807b216fbbbdd8 Mon Sep 17 00:00:00 2001 From: Zheming Li Date: Mon, 12 Jul 2021 11:35:51 +0800 Subject: [PATCH 58/70] Fix same tmp dir in freezer v2 tests --- src/cgroups/v2/freezer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cgroups/v2/freezer.rs b/src/cgroups/v2/freezer.rs index ef0643909..5827f813d 100644 --- a/src/cgroups/v2/freezer.rs +++ b/src/cgroups/v2/freezer.rs @@ -178,8 +178,8 @@ mod tests { #[test] fn test_set_freezer_state_error() { - let tmp = - create_temp_dir("test_set_freezer_state").expect("create temp directory for test"); + let tmp = create_temp_dir("test_set_freezer_state_error") + .expect("create temp directory for test"); set_fixture(&tmp, CGROUP_FREEZE, "").expect("Set fixure for freezer state"); set_fixture(&tmp, CGROUP_EVENTS, "").expect("Set fixure for freezer state"); From 21a7686b5345a67dc1db7d19a47172355c0bf017 Mon Sep 17 00:00:00 2001 From: 0xdco Date: Tue, 13 Jul 2021 00:26:48 +0200 Subject: [PATCH 59/70] cgroupsv2 hugetlb --- src/cgroups/v2/hugetlb.rs | 112 +++++++++++++++++++++++++++++++++++++- 1 file changed, 109 insertions(+), 3 deletions(-) diff --git a/src/cgroups/v2/hugetlb.rs b/src/cgroups/v2/hugetlb.rs index 9e1a8321b..1470962d7 100644 --- a/src/cgroups/v2/hugetlb.rs +++ b/src/cgroups/v2/hugetlb.rs @@ -1,12 +1,118 @@ -use anyhow::Result; +use anyhow::{bail, Result}; +use std::path::Path; use super::controller::Controller; -use oci_spec::LinuxResources; +use crate::cgroups::common; +use oci_spec::{LinuxHugepageLimit, LinuxResources}; +use regex::Regex; pub struct HugeTlb {} impl Controller for HugeTlb { - fn apply(_: &LinuxResources, _: &std::path::Path) -> Result<()> { + fn apply(linux_resources: &LinuxResources, cgroup_root: &std::path::Path) -> Result<()> { + log::debug!("Apply hugetlb cgroup v2 config"); + if let Some(hugepage_limits) = Self::needs_to_handle(linux_resources) { + for hugetlb in hugepage_limits { + Self::apply(cgroup_root, hugetlb)? + } + } Ok(()) } } + +impl HugeTlb { + fn apply(root_path: &Path, hugetlb: &LinuxHugepageLimit) -> Result<()> { + let re = Regex::new(r"(?P[0-9]+)[KMG]B")?; + let caps = re.captures(&hugetlb.page_size); + match caps { + None => bail!("page size must be in the format [0-9]+[KMG]B"), + Some(caps) => { + let page_size: u64 = caps["pagesize"].parse()?; + if !Self::is_power_of_two(page_size) { + bail!("page size must be in the format of 2^(integer)"); + } + } + } + + common::write_cgroup_file( + root_path.join(format!("hugetlb.{}.limit_in_bytes", hugetlb.page_size)), + hugetlb.limit, + )?; + Ok(()) + } + + fn needs_to_handle(linux_resources: &LinuxResources) -> Option<&Vec> { + if !linux_resources.hugepage_limits.is_empty() { + return Some(&linux_resources.hugepage_limits); + } + + None + } + + fn is_power_of_two(number: u64) -> bool { + (number != 0) && (number & (number - 1)) == 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; + use oci_spec::LinuxHugepageLimit; + use std::fs::read_to_string; + + #[test] + fn test_set_hugetlb() { + let page_file_name = "hugetlb.2MB.limit_in_bytes"; + let tmp = create_temp_dir("test_set_hugetlb").expect("create temp directory for test"); + set_fixture(&tmp, page_file_name, "0").expect("Set fixture for 2 MB page size"); + + let hugetlb = LinuxHugepageLimit { + page_size: "2MB".to_owned(), + limit: 16384, + }; + HugeTlb::apply(&tmp, &hugetlb).expect("apply hugetlb"); + let content = read_to_string(tmp.join(page_file_name)).expect("Read hugetlb file content"); + assert_eq!(hugetlb.limit.to_string(), content); + } + + #[test] + fn test_set_hugetlb_with_invalid_page_size() { + let tmp = create_temp_dir("test_set_hugetlb_with_invalid_page_size") + .expect("create temp directory for test"); + + let hugetlb = LinuxHugepageLimit { + page_size: "3MB".to_owned(), + limit: 16384, + }; + + let result = HugeTlb::apply(&tmp, &hugetlb); + assert!( + result.is_err(), + "page size that is not a power of two should be an error" + ); + } + + quickcheck! { + fn property_test_set_hugetlb(hugetlb: LinuxHugepageLimit) -> bool { + let page_file_name = format!("hugetlb.{:?}.limit_in_bytes", hugetlb.page_size); + let tmp = create_temp_dir("property_test_set_hugetlb").expect("create temp directory for test"); + set_fixture(&tmp, &page_file_name, "0").expect("Set fixture for page size"); + + let result = HugeTlb::apply(&tmp, &hugetlb); + + let re = Regex::new(r"(?P[0-9]+)[KMG]B").expect("create regex for parsing pagesize"); + let caps = re.captures(&hugetlb.page_size).expect("should capture pagesize"); + + let page_size: u64 = caps["pagesize"].parse().expect("should contain captured pagesize"); + if HugeTlb::is_power_of_two(page_size) && page_size != 1 { + let content = + read_to_string(tmp.join(page_file_name)).expect("Read hugetlb file content"); + hugetlb.limit.to_string() == content + } else { + result.is_err() + } + } + } +} From 4549309e213b841267d206748cc19b243408812e Mon Sep 17 00:00:00 2001 From: 0xdco Date: Tue, 13 Jul 2021 01:00:38 +0200 Subject: [PATCH 60/70] remove regex usage from hugetlb v2 --- src/cgroups/v2/hugetlb.rs | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/cgroups/v2/hugetlb.rs b/src/cgroups/v2/hugetlb.rs index 1470962d7..585a68fbf 100644 --- a/src/cgroups/v2/hugetlb.rs +++ b/src/cgroups/v2/hugetlb.rs @@ -4,7 +4,6 @@ use std::path::Path; use super::controller::Controller; use crate::cgroups::common; use oci_spec::{LinuxHugepageLimit, LinuxResources}; -use regex::Regex; pub struct HugeTlb {} @@ -22,16 +21,14 @@ impl Controller for HugeTlb { impl HugeTlb { fn apply(root_path: &Path, hugetlb: &LinuxHugepageLimit) -> Result<()> { - let re = Regex::new(r"(?P[0-9]+)[KMG]B")?; - let caps = re.captures(&hugetlb.page_size); - match caps { - None => bail!("page size must be in the format [0-9]+[KMG]B"), - Some(caps) => { - let page_size: u64 = caps["pagesize"].parse()?; - if !Self::is_power_of_two(page_size) { - bail!("page size must be in the format of 2^(integer)"); - } - } + let page_size: String = hugetlb + .page_size + .chars() + .take_while(|c| c.is_digit(10)) + .collect(); + let page_size: u64 = page_size.parse()?; + if !Self::is_power_of_two(page_size) { + bail!("page size must be in the format of 2^(integer)"); } common::write_cgroup_file( @@ -97,15 +94,17 @@ mod tests { quickcheck! { fn property_test_set_hugetlb(hugetlb: LinuxHugepageLimit) -> bool { let page_file_name = format!("hugetlb.{:?}.limit_in_bytes", hugetlb.page_size); - let tmp = create_temp_dir("property_test_set_hugetlb").expect("create temp directory for test"); + let tmp = create_temp_dir("property_test_set_hugetlbv2").expect("create temp directory for test"); set_fixture(&tmp, &page_file_name, "0").expect("Set fixture for page size"); - let result = HugeTlb::apply(&tmp, &hugetlb); - let re = Regex::new(r"(?P[0-9]+)[KMG]B").expect("create regex for parsing pagesize"); - let caps = re.captures(&hugetlb.page_size).expect("should capture pagesize"); + let page_size: String = hugetlb + .page_size + .chars() + .take_while(|c| c.is_digit(10)) + .collect(); + let page_size: u64 = page_size.parse().expect("parse page size"); - let page_size: u64 = caps["pagesize"].parse().expect("should contain captured pagesize"); if HugeTlb::is_power_of_two(page_size) && page_size != 1 { let content = read_to_string(tmp.join(page_file_name)).expect("Read hugetlb file content"); From c1686c64f0c89deeb952cb7cd17b36c9d419c006 Mon Sep 17 00:00:00 2001 From: 0xdco Date: Tue, 13 Jul 2021 01:29:13 +0200 Subject: [PATCH 61/70] use different temp dir for hugetlbv2 tests --- src/cgroups/v2/hugetlb.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cgroups/v2/hugetlb.rs b/src/cgroups/v2/hugetlb.rs index 585a68fbf..fe0cf9dab 100644 --- a/src/cgroups/v2/hugetlb.rs +++ b/src/cgroups/v2/hugetlb.rs @@ -62,7 +62,7 @@ mod tests { #[test] fn test_set_hugetlb() { let page_file_name = "hugetlb.2MB.limit_in_bytes"; - let tmp = create_temp_dir("test_set_hugetlb").expect("create temp directory for test"); + let tmp = create_temp_dir("test_set_hugetlbv2").expect("create temp directory for test"); set_fixture(&tmp, page_file_name, "0").expect("Set fixture for 2 MB page size"); let hugetlb = LinuxHugepageLimit { @@ -76,7 +76,7 @@ mod tests { #[test] fn test_set_hugetlb_with_invalid_page_size() { - let tmp = create_temp_dir("test_set_hugetlb_with_invalid_page_size") + let tmp = create_temp_dir("test_set_hugetlbv2_with_invalid_page_size") .expect("create temp directory for test"); let hugetlb = LinuxHugepageLimit { From 6be676969a0e7130366757e5e690e9477493e29f Mon Sep 17 00:00:00 2001 From: Yashodhan Joshi Date: Tue, 13 Jul 2021 11:33:25 +0530 Subject: [PATCH 62/70] Document Info module --- src/info.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/info.rs b/src/info.rs index 087b7bbee..ae585565c 100644 --- a/src/info.rs +++ b/src/info.rs @@ -1,3 +1,4 @@ +//! Contains functions related to printing information about system running Youki use std::{fs, path::Path}; use anyhow::Result; @@ -21,10 +22,12 @@ impl Info { } } +/// print Version of Youki pub fn print_youki() { println!("{:<18}{}", "Version", env!("CARGO_PKG_VERSION")); } +/// Print Kernel Release, Version and Architecture pub fn print_kernel() { let uname = nix::sys::utsname::uname(); println!("{:<18}{}", "Kernel-Release", uname.release()); @@ -32,6 +35,7 @@ pub fn print_kernel() { println!("{:<18}{}", "Architecture", uname.machine()); } +/// Prints OS Distribution information // see https://www.freedesktop.org/software/systemd/man/os-release.html pub fn print_os() { if let Some(os) = try_read_os_from("/etc/os-release") { @@ -41,6 +45,7 @@ pub fn print_os() { } } +/// Helper function to read the OS Distribution info fn try_read_os_from>(path: P) -> Option { let os_release = path.as_ref(); if !os_release.exists() { @@ -69,6 +74,7 @@ fn try_read_os_from>(path: P) -> Option { None } +/// Helper function to find keyword values in OS info string fn find_parameter<'a>(content: &'a str, param_name: &str) -> Option<&'a str> { let param_value = content .lines() @@ -82,6 +88,7 @@ fn find_parameter<'a>(content: &'a str, param_name: &str) -> Option<&'a str> { None } +/// Print Hardware information of system pub fn print_hardware() { if let Ok(cpu_info) = CpuInfo::new() { println!("{:<18}{}", "Cores", cpu_info.num_cores()); @@ -96,6 +103,7 @@ pub fn print_hardware() { } } +/// Print cgroups info of system pub fn print_cgroups() { if let Ok(cgroup_fs) = cgroups::common::get_supported_cgroup_fs() { let cgroup_fs: Vec = cgroup_fs.into_iter().map(|c| c.to_string()).collect(); From e5119a0ca13fc664c0704698062a8a2589b12fe9 Mon Sep 17 00:00:00 2001 From: Yashodhan Joshi Date: Tue, 13 Jul 2021 11:37:32 +0530 Subject: [PATCH 63/70] Update doc-draft.md --- docs/doc-draft.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/doc-draft.md b/docs/doc-draft.md index a2a725269..c8dfd7d40 100644 --- a/docs/doc-draft.md +++ b/docs/doc-draft.md @@ -95,4 +95,13 @@ This also provides implementation for Linux syscalls for the trait. ## Capabilities +This has functions related to set and reset specific capabilities, as well as to drop extra privileges + - [Simple explanation of capabilities](https://blog.container-solutions.com/linux-capabilities-in-practice) +- [man page for capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html) + +## Info + +This is primarily for printing info about system running youki, such as OS release, architecture, cpu info, cgroups info etc. , as this info can be helpful when reporting issues. + +- [about /etc/os-release](https://www.freedesktop.org/software/systemd/man/os-release.html) From ac7d1c0edd7a30a256c5423cf7a0beedc6e061d8 Mon Sep 17 00:00:00 2001 From: Travis Sturzl Date: Wed, 14 Jul 2021 22:30:44 -0600 Subject: [PATCH 64/70] systemd dependency as a build feature --- Cargo.toml | 7 +++++-- src/cgroups/common.rs | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ee1892a3d..8c8b09a0b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,9 @@ authors = ["utam0k "] edition = "2018" description = "A container runtime written in Rust" +[features] +systemd_cgroups = ["systemd"] + [dependencies.clap] version = "3.0.0-beta.2" default-features = false @@ -25,7 +28,7 @@ chrono = { version="0.4", features = ["serde"] } once_cell = "1.6.0" futures = { version = "0.3", features = ["thread-pool"] } oci_spec = { version = "0.1.0", path = "./oci_spec" } -systemd = { version = "0.8", default-features = false } +systemd = { version = "0.8", default-features = false, optional = true } dbus = "0.9.2" tabwriter = "1" @@ -35,4 +38,4 @@ quickcheck = "1" serial_test = "0.5.1" [profile.release] -lto = true \ No newline at end of file +lto = true diff --git a/src/cgroups/common.rs b/src/cgroups/common.rs index 20fd04101..a035f795a 100644 --- a/src/cgroups/common.rs +++ b/src/cgroups/common.rs @@ -10,7 +10,12 @@ use anyhow::{bail, Context, Result}; use nix::unistd::Pid; use oci_spec::LinuxResources; use procfs::process::Process; +#[cfg(feature = "systemd_cgroups")] use systemd::daemon::booted; +#[cfg(not(feature = "systemd_cgroups"))] +fn booted() -> Result { + Ok(false) +} use crate::cgroups::v1; use crate::cgroups::v2; From 4cbdbdc9af1102c5e6484ef525c4cf797e63aa4f Mon Sep 17 00:00:00 2001 From: Travis Sturzl Date: Wed, 14 Jul 2021 22:30:44 -0600 Subject: [PATCH 65/70] systemd dependency as a build feature --- Cargo.toml | 7 +++++-- src/cgroups/common.rs | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ee1892a3d..8c8b09a0b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,9 @@ authors = ["utam0k "] edition = "2018" description = "A container runtime written in Rust" +[features] +systemd_cgroups = ["systemd"] + [dependencies.clap] version = "3.0.0-beta.2" default-features = false @@ -25,7 +28,7 @@ chrono = { version="0.4", features = ["serde"] } once_cell = "1.6.0" futures = { version = "0.3", features = ["thread-pool"] } oci_spec = { version = "0.1.0", path = "./oci_spec" } -systemd = { version = "0.8", default-features = false } +systemd = { version = "0.8", default-features = false, optional = true } dbus = "0.9.2" tabwriter = "1" @@ -35,4 +38,4 @@ quickcheck = "1" serial_test = "0.5.1" [profile.release] -lto = true \ No newline at end of file +lto = true diff --git a/src/cgroups/common.rs b/src/cgroups/common.rs index 20fd04101..a035f795a 100644 --- a/src/cgroups/common.rs +++ b/src/cgroups/common.rs @@ -10,7 +10,12 @@ use anyhow::{bail, Context, Result}; use nix::unistd::Pid; use oci_spec::LinuxResources; use procfs::process::Process; +#[cfg(feature = "systemd_cgroups")] use systemd::daemon::booted; +#[cfg(not(feature = "systemd_cgroups"))] +fn booted() -> Result { + Ok(false) +} use crate::cgroups::v1; use crate::cgroups::v2; From 287de24228d08be391d519378e65156cf68b5917 Mon Sep 17 00:00:00 2001 From: tsturzl Date: Thu, 17 Jun 2021 21:22:54 -0600 Subject: [PATCH 66/70] time to test --- src/cgroups/v2/memory.rs | 122 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 2 deletions(-) diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index a83f7391f..f4b7771ab 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -1,10 +1,16 @@ -use anyhow::Result; +use anyhow::{Result, *}; use std::path::Path; use oci_spec::{LinuxMemory, LinuxResources}; +use crate::cgroups::common; + use super::controller::Controller; +const CGROUP_MEMORY_SWAP: &str = "memory.swap.max"; +const CGROUP_MEMORY_MAX: &str = "memory.max"; +const CGROUP_MEMORY_LOW: &str = "memory.low"; + pub struct Memory {} impl Controller for Memory { @@ -18,7 +24,119 @@ impl Controller for Memory { } impl Memory { - fn apply(_: &Path, _: &LinuxMemory) -> Result<()> { + fn set>(path: P, val: i64) -> Result<()> { + if val == 0 { + Ok(()) + } else if val == -1 { + common::write_cgroup_file_str(path, "max") + } else { + common::write_cgroup_file(path, val) + } + } + + fn apply(path: &Path, memory: &LinuxMemory) -> Result<()> { + // if nothing is set just exit right away + if memory.reservation.is_none() && memory.limit.is_none() && memory.swap.is_none() { + return Ok(()); + } + + match memory.limit { + Some(limit) if limit < -1 => { + bail!("invalid memory value: {}", limit); + } + Some(limit) => match memory.swap { + Some(swap) if swap < -1 => { + bail!("invalid swap value: {}", swap); + } + Some(swap) => { + Memory::set(path.join(CGROUP_MEMORY_SWAP), swap)?; + Memory::set(path.join(CGROUP_MEMORY_MAX), limit)?; + } + None => { + if limit == -1 { + Memory::set(path.join(CGROUP_MEMORY_SWAP), -1)?; + } + Memory::set(path.join(CGROUP_MEMORY_MAX), limit)?; + } + }, + None => { + if memory.swap.is_some() { + bail!("unsable to set swap limit without memory limit"); + } + } + }; + + if let Some(reservation) = memory.reservation { + if reservation < -1 { + bail!("invalid memory reservation value: {}", reservation); + } + Memory::set(path.join(CGROUP_MEMORY_LOW), reservation)?; + } + Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::cgroups::test::{create_temp_dir, set_fixture}; + use oci_spec::LinuxMemory; + use std::fs::read_to_string; + + #[test] + fn test_set_memory_v2() { + let tmp = create_temp_dir("test_set_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let limit = 1024; + let reservation = 512; + let swap = 2048; + let memory_limits = &LinuxMemory { + limit: Some(limit), + reservation: Some(reservation), + swap: Some(swap), + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + Memory::apply(&tmp, memory_limits).expect("apply memory limits"); + + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit"); + assert_eq!(limit_content, limit.to_string()); + + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); + assert_eq!(swap_content, swap.to_string()); + + let reservation_content = + read_to_string(tmp.join(CGROUP_MEMORY_LOW)).expect("read memory reservation"); + assert_eq!(reservation_content, reservation.to_string()); + } + + #[test] + fn test_set_memory_unlimited_v2() { + let tmp = create_temp_dir("test_set_memory_unlimited_v2") + .expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(-1), + reservation: None, + swap: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + Memory::apply(&tmp, memory_limits).expect("apply memory limits"); + + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit"); + assert_eq!(limit_content, "max"); + + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); + assert_eq!(swap_content, "max"); + } +} From 1efdefc42dd5e7d9a32fef75527945b9c4d1427d Mon Sep 17 00:00:00 2001 From: Travis Sturzl Date: Thu, 17 Jun 2021 22:21:36 -0600 Subject: [PATCH 67/70] hella tests --- src/cgroups/v2/memory.rs | 135 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index f4b7771ab..df00bbf49 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -61,7 +61,7 @@ impl Memory { }, None => { if memory.swap.is_some() { - bail!("unsable to set swap limit without memory limit"); + bail!("unable to set swap limit without memory limit"); } } }; @@ -139,4 +139,137 @@ mod tests { let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); assert_eq!(swap_content, "max"); } + + #[test] + fn test_err_swap_no_memory_v2() { + let tmp = + create_temp_dir("test_err_swap_no_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: None, + swap: Some(512), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + #[test] + fn test_err_bad_limit_v2() { + let tmp = create_temp_dir("test_err_bad_limit_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(-2), + swap: None, + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + #[test] + fn test_err_bad_swap_v2() { + let tmp = create_temp_dir("test_err_bad_swap_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(512), + swap: Some(-3), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + quickcheck! { + fn property_test_set_memory_v2(linux_memory: LinuxMemory) -> bool { + let tmp = create_temp_dir("property_test_set_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let result = Memory::apply(&tmp, &linux_memory); + + // we need to check for expected errors first and foremost or we'll get false negatives + // later + if let Some(limit) = linux_memory.limit { + if limit < -1 { + return result.is_err(); + } + } + + if let Some(swap) = linux_memory.swap { + if swap < -1 { + return result.is_err(); + } + if linux_memory.limit.is_none() { + return result.is_err(); + } + } + + if let Some(reservation) = linux_memory.reservation { + if reservation < -1 { + return result.is_err(); + } + } + + // check the limit file is set as expected + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit to string"); + let limit_check = match linux_memory.limit { + Some(limit) if limit == -1 => limit_content == "max", + Some(limit) => limit_content == limit.to_string(), + None => limit_content == "0", + }; + + // check the swap file is set as expected + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit to string"); + let swap_check = match linux_memory.swap { + Some(swap) if swap == -1 => swap_content == "max", + Some(swap) => swap_content == swap.to_string(), + None => { + match linux_memory.limit { + Some(limit) if limit == -1 => swap_content == "max", + _ => swap_content == "0", + } + } + }; + + + // check the resevation file is set as expected + let reservation_content = read_to_string(tmp.join(CGROUP_MEMORY_LOW)).expect("read memory reservation to string"); + let reservation_check = match linux_memory.reservation { + Some(reservation) if reservation == -1 => reservation_content == "max", + Some(reservation) => reservation_content == reservation.to_string(), + None => reservation_content == "0", + }; + + println!("limit_check: {}", limit_check); + println!("swap_check: {}", swap_check); + println!("reservation_check: {}", reservation_check); + limit_check && swap_check && reservation_check + } + } } From 314a7a9b23b37985ac11baec29fc842a1de603c8 Mon Sep 17 00:00:00 2001 From: tsturzl Date: Thu, 17 Jun 2021 21:22:54 -0600 Subject: [PATCH 68/70] time to test --- src/cgroups/v2/memory.rs | 135 +-------------------------------------- 1 file changed, 1 insertion(+), 134 deletions(-) diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index df00bbf49..f4b7771ab 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -61,7 +61,7 @@ impl Memory { }, None => { if memory.swap.is_some() { - bail!("unable to set swap limit without memory limit"); + bail!("unsable to set swap limit without memory limit"); } } }; @@ -139,137 +139,4 @@ mod tests { let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); assert_eq!(swap_content, "max"); } - - #[test] - fn test_err_swap_no_memory_v2() { - let tmp = - create_temp_dir("test_err_swap_no_memory_v2").expect("create temp directory for test"); - set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); - set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); - set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); - - let memory_limits = &LinuxMemory { - limit: None, - swap: Some(512), - reservation: None, - kernel: None, - kernel_tcp: None, - swappiness: None, - }; - - let result = Memory::apply(&tmp, memory_limits); - - assert!(result.is_err()); - } - - #[test] - fn test_err_bad_limit_v2() { - let tmp = create_temp_dir("test_err_bad_limit_v2").expect("create temp directory for test"); - set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); - set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); - set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); - - let memory_limits = &LinuxMemory { - limit: Some(-2), - swap: None, - reservation: None, - kernel: None, - kernel_tcp: None, - swappiness: None, - }; - - let result = Memory::apply(&tmp, memory_limits); - - assert!(result.is_err()); - } - - #[test] - fn test_err_bad_swap_v2() { - let tmp = create_temp_dir("test_err_bad_swap_v2").expect("create temp directory for test"); - set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); - set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); - set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); - - let memory_limits = &LinuxMemory { - limit: Some(512), - swap: Some(-3), - reservation: None, - kernel: None, - kernel_tcp: None, - swappiness: None, - }; - - let result = Memory::apply(&tmp, memory_limits); - - assert!(result.is_err()); - } - - quickcheck! { - fn property_test_set_memory_v2(linux_memory: LinuxMemory) -> bool { - let tmp = create_temp_dir("property_test_set_memory_v2").expect("create temp directory for test"); - set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); - set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); - set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); - - let result = Memory::apply(&tmp, &linux_memory); - - // we need to check for expected errors first and foremost or we'll get false negatives - // later - if let Some(limit) = linux_memory.limit { - if limit < -1 { - return result.is_err(); - } - } - - if let Some(swap) = linux_memory.swap { - if swap < -1 { - return result.is_err(); - } - if linux_memory.limit.is_none() { - return result.is_err(); - } - } - - if let Some(reservation) = linux_memory.reservation { - if reservation < -1 { - return result.is_err(); - } - } - - // check the limit file is set as expected - let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit to string"); - let limit_check = match linux_memory.limit { - Some(limit) if limit == -1 => limit_content == "max", - Some(limit) => limit_content == limit.to_string(), - None => limit_content == "0", - }; - - // check the swap file is set as expected - let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit to string"); - let swap_check = match linux_memory.swap { - Some(swap) if swap == -1 => swap_content == "max", - Some(swap) => swap_content == swap.to_string(), - None => { - match linux_memory.limit { - Some(limit) if limit == -1 => swap_content == "max", - _ => swap_content == "0", - } - } - }; - - - // check the resevation file is set as expected - let reservation_content = read_to_string(tmp.join(CGROUP_MEMORY_LOW)).expect("read memory reservation to string"); - let reservation_check = match linux_memory.reservation { - Some(reservation) if reservation == -1 => reservation_content == "max", - Some(reservation) => reservation_content == reservation.to_string(), - None => reservation_content == "0", - }; - - println!("limit_check: {}", limit_check); - println!("swap_check: {}", swap_check); - println!("reservation_check: {}", reservation_check); - limit_check && swap_check && reservation_check - } - } } From 8cba204f6d9701487a851dadc34120ab327fb8ae Mon Sep 17 00:00:00 2001 From: Travis Sturzl Date: Thu, 17 Jun 2021 22:21:36 -0600 Subject: [PATCH 69/70] hella tests --- src/cgroups/v2/memory.rs | 135 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index f4b7771ab..df00bbf49 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -61,7 +61,7 @@ impl Memory { }, None => { if memory.swap.is_some() { - bail!("unsable to set swap limit without memory limit"); + bail!("unable to set swap limit without memory limit"); } } }; @@ -139,4 +139,137 @@ mod tests { let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit"); assert_eq!(swap_content, "max"); } + + #[test] + fn test_err_swap_no_memory_v2() { + let tmp = + create_temp_dir("test_err_swap_no_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: None, + swap: Some(512), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + #[test] + fn test_err_bad_limit_v2() { + let tmp = create_temp_dir("test_err_bad_limit_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(-2), + swap: None, + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + #[test] + fn test_err_bad_swap_v2() { + let tmp = create_temp_dir("test_err_bad_swap_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let memory_limits = &LinuxMemory { + limit: Some(512), + swap: Some(-3), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + + let result = Memory::apply(&tmp, memory_limits); + + assert!(result.is_err()); + } + + quickcheck! { + fn property_test_set_memory_v2(linux_memory: LinuxMemory) -> bool { + let tmp = create_temp_dir("property_test_set_memory_v2").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_MAX, "0").expect("set fixture for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_LOW, "0").expect("set fixture for memory reservation"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP, "0").expect("set fixture for swap limit"); + + let result = Memory::apply(&tmp, &linux_memory); + + // we need to check for expected errors first and foremost or we'll get false negatives + // later + if let Some(limit) = linux_memory.limit { + if limit < -1 { + return result.is_err(); + } + } + + if let Some(swap) = linux_memory.swap { + if swap < -1 { + return result.is_err(); + } + if linux_memory.limit.is_none() { + return result.is_err(); + } + } + + if let Some(reservation) = linux_memory.reservation { + if reservation < -1 { + return result.is_err(); + } + } + + // check the limit file is set as expected + let limit_content = read_to_string(tmp.join(CGROUP_MEMORY_MAX)).expect("read memory limit to string"); + let limit_check = match linux_memory.limit { + Some(limit) if limit == -1 => limit_content == "max", + Some(limit) => limit_content == limit.to_string(), + None => limit_content == "0", + }; + + // check the swap file is set as expected + let swap_content = read_to_string(tmp.join(CGROUP_MEMORY_SWAP)).expect("read swap limit to string"); + let swap_check = match linux_memory.swap { + Some(swap) if swap == -1 => swap_content == "max", + Some(swap) => swap_content == swap.to_string(), + None => { + match linux_memory.limit { + Some(limit) if limit == -1 => swap_content == "max", + _ => swap_content == "0", + } + } + }; + + + // check the resevation file is set as expected + let reservation_content = read_to_string(tmp.join(CGROUP_MEMORY_LOW)).expect("read memory reservation to string"); + let reservation_check = match linux_memory.reservation { + Some(reservation) if reservation == -1 => reservation_content == "max", + Some(reservation) => reservation_content == reservation.to_string(), + None => reservation_content == "0", + }; + + println!("limit_check: {}", limit_check); + println!("swap_check: {}", swap_check); + println!("reservation_check: {}", reservation_check); + limit_check && swap_check && reservation_check + } + } } From db0b203a0c9d25e79f8e022efbca83f3f025d3a4 Mon Sep 17 00:00:00 2001 From: Travis Sturzl Date: Fri, 16 Jul 2021 10:17:41 -0600 Subject: [PATCH 70/70] libsystemd is optional for builds and runtime errors if you try to use --- Cargo.toml | 1 + src/cgroups/common.rs | 2 +- src/cgroups/v2/memory.rs | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8c8b09a0b..f734a094b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ edition = "2018" description = "A container runtime written in Rust" [features] +default = ["systemd_cgroups"] systemd_cgroups = ["systemd"] [dependencies.clap] diff --git a/src/cgroups/common.rs b/src/cgroups/common.rs index a035f795a..6e35813eb 100644 --- a/src/cgroups/common.rs +++ b/src/cgroups/common.rs @@ -14,7 +14,7 @@ use procfs::process::Process; use systemd::daemon::booted; #[cfg(not(feature = "systemd_cgroups"))] fn booted() -> Result { - Ok(false) + bail!("This build does not include the systemd cgroups feature") } use crate::cgroups::v1; diff --git a/src/cgroups/v2/memory.rs b/src/cgroups/v2/memory.rs index df00bbf49..24f6884af 100644 --- a/src/cgroups/v2/memory.rs +++ b/src/cgroups/v2/memory.rs @@ -80,7 +80,8 @@ impl Memory { #[cfg(test)] mod tests { use super::*; - use crate::cgroups::test::{create_temp_dir, set_fixture}; + use crate::cgroups::test::set_fixture; + use crate::utils::create_temp_dir; use oci_spec::LinuxMemory; use std::fs::read_to_string;