diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 81d25a77f..66f7fb365 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,7 +45,7 @@ jobs: expect_err_num=8 act_err_num=0 cd $(go env GOPATH)/src/github.com/opencontainers/runtime-tools - test_cases=("default/default.t" "linux_cgroups_devices/linux_cgroups_devices.t" "linux_cgroups_hugetlb/linux_cgroups_hugetlb.t" "linux_cgroups_pids/linux_cgroups_pids.t") + test_cases=("default/default.t" "linux_cgroups_devices/linux_cgroups_devices.t" "linux_cgroups_hugetlb/linux_cgroups_hugetlb.t" "linux_cgroups_pids/linux_cgroups_pids.t", "linux_cgroups_memory/linux_cgroups_memory.t") for case in "${test_cases[@]}"; do title="Running $case" if [ 0 -ne $(sudo RUNTIME=$GITHUB_WORKSPACE/target/x86_64-unknown-linux-gnu/debug/youki ./validation/$case | grep "not ok" | wc -l) ]; then diff --git a/.gitignore b/.gitignore index 58b204af4..9039ea945 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,6 @@ /target .vagrant/ + +tags +tags.lock +tags.temp diff --git a/Cargo.toml b/Cargo.toml index 6a9b7e598..eb8f2e502 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,4 +19,4 @@ mio = { version = "0.7", features = ["os-ext", "os-poll"] } chrono = "0.4" once_cell = "1.6.0" futures = { version = "0.3", features = ["thread-pool"] } -regex = "1.5" \ No newline at end of file +regex = "1.5" diff --git a/src/cgroups/controller_type.rs b/src/cgroups/controller_type.rs index 744356b9b..d16f1a9ea 100644 --- a/src/cgroups/controller_type.rs +++ b/src/cgroups/controller_type.rs @@ -4,6 +4,7 @@ pub enum ControllerType { Devices, HugeTlb, Pids, + Memory, } impl ToString for ControllerType { @@ -12,6 +13,7 @@ impl ToString for ControllerType { Self::Devices => "devices".into(), Self::HugeTlb => "hugetlb".into(), Self::Pids => "pids".into(), + Self::Memory => "memory".into(), } } } diff --git a/src/cgroups/manager.rs b/src/cgroups/manager.rs index 38cb29fda..1215ec0d8 100644 --- a/src/cgroups/manager.rs +++ b/src/cgroups/manager.rs @@ -7,13 +7,15 @@ use procfs::process::Process; use crate::{cgroups::ControllerType, spec::LinuxResources, utils::PathBufExt}; -use super::{devices::Devices, hugetlb::Hugetlb, pids::Pids, Controller}; +use super::{devices::Devices, hugetlb::Hugetlb, memory::Memory, pids::Pids, Controller}; const CONTROLLERS: &[ControllerType] = &[ ControllerType::Devices, ControllerType::HugeTlb, + ControllerType::Memory, ControllerType::Pids, ]; + pub struct Manager { subsystems: HashMap, } @@ -36,6 +38,7 @@ impl Manager { match subsys.0.as_str() { "devices" => Devices::apply(linux_resources, &subsys.1, pid)?, "hugetlb" => Hugetlb::apply(linux_resources, &subsys.1, pid)?, + "memory" => Memory::apply(linux_resources, &subsys.1, pid)?, "pids" => Pids::apply(linux_resources, &subsys.1, pid)?, _ => continue, } @@ -59,17 +62,13 @@ impl Manager { let mount = Process::myself()? .mountinfo()? .into_iter() - .filter(|m| m.fs_type == "cgroup" && m.mount_point.ends_with(subsystem)) - .collect::>() - .pop() + .find(|m| m.fs_type == "cgroup" && m.mount_point.ends_with(subsystem)) .unwrap(); let cgroup = Process::myself()? .cgroups()? .into_iter() - .filter(|c| c.controllers.contains(&subsystem.to_owned())) - .collect::>() - .pop() + .find(|c| c.controllers.contains(&subsystem.to_owned())) .unwrap(); let p = if cgroup_path.to_string_lossy().into_owned().is_empty() { diff --git a/src/cgroups/memory.rs b/src/cgroups/memory.rs new file mode 100644 index 000000000..8cc5805e3 --- /dev/null +++ b/src/cgroups/memory.rs @@ -0,0 +1,343 @@ +use std::io::{prelude::*, Write}; +use std::{ + fs::{create_dir_all, OpenOptions}, + path::Path, +}; + +use anyhow::{Result, *}; +use nix::{errno::Errno, unistd::Pid}; + +use crate::{ + cgroups::Controller, + spec::{LinuxMemory, LinuxResources}, +}; + +const CGROUP_MEMORY_SWAP_LIMIT: &str = "memory.memsw.limit_in_bytes"; +const CGROUP_MEMORY_LIMIT: &str = "memory.limit_in_bytes"; +const CGROUP_MEMORY_USAGE: &str = "memory.usage_in_bytes"; +const CGROUP_MEMORY_MAX_USAGE: &str = "memory.max_usage_in_bytes"; +const CGROUP_MEMORY_SWAPPINESS: &str = "memory.swappiness"; +const CGROUP_MEMORY_RESERVATION: &str = "memory.soft_limit_in_bytes"; +const CGROUP_MEMORY_OOM_CONTROL: &str = "memory.oom_control"; + +const CGROUP_KERNEL_MEMORY_LIMIT: &str = "memory.kmem.limit_in_bytes"; +const CGROUP_KERNEL_TCP_MEMORY_LIMIT: &str = "memory.kmem.tcp.limit_in_bytes"; + +pub struct Memory {} + +impl Controller for Memory { + fn apply(linux_resources: &LinuxResources, cgroup_root: &Path, pid: Pid) -> Result<()> { + log::info!( + "Memory controller path: {}", + cgroup_root.to_str().unwrap_or("") + ); + create_dir_all(&cgroup_root)?; + + if let Some(memory) = &linux_resources.memory { + let reservation = memory.reservation.unwrap_or(0); + + Self::apply(&memory, cgroup_root)?; + + if reservation != 0 { + Self::set(reservation, &cgroup_root.join(CGROUP_MEMORY_RESERVATION))?; + } + + if linux_resources.disable_oom_killer { + Self::set(0, &cgroup_root.join(CGROUP_MEMORY_OOM_CONTROL))?; + } else { + Self::set(1, &cgroup_root.join(CGROUP_MEMORY_OOM_CONTROL))?; + } + + if let Some(swappiness) = memory.swappiness { + if swappiness <= 100 { + Self::set(swappiness, &cgroup_root.join(CGROUP_MEMORY_SWAPPINESS))?; + } else { + // invalid swappiness value + return Err(anyhow!( + "Invalid swappiness value: {}. Valid range is 0-100", + swappiness + )); + } + } + + // NOTE: Seems as though kernel and kernelTCP are both deprecated + // neither are implemented by runc. Tests pass without this, but + // kept in per the spec. + if let Some(kmem) = memory.kernel { + Self::set(kmem, &cgroup_root.join(CGROUP_KERNEL_MEMORY_LIMIT))?; + } + if let Some(tcp_mem) = memory.kernel_tcp { + Self::set(tcp_mem, &cgroup_root.join(CGROUP_KERNEL_TCP_MEMORY_LIMIT))?; + } + + OpenOptions::new() + .create(false) + .write(true) + .truncate(false) + .open(cgroup_root.join("cgroup.procs"))? + .write_all(pid.to_string().as_bytes())?; + } + Ok(()) + } +} + +impl Memory { + fn get_memory_usage(cgroup_root: &Path) -> Result { + let path = cgroup_root.join(CGROUP_MEMORY_USAGE); + let mut contents = String::new(); + OpenOptions::new() + .create(false) + .read(true) + .open(path)? + .read_to_string(&mut contents)?; + + contents = contents.trim().to_string(); + + if contents == "max" { + return Ok(u64::MAX); + } + + let val = contents.parse::()?; + Ok(val) + } + + fn get_memory_max_usage(cgroup_root: &Path) -> Result { + let path = cgroup_root.join(CGROUP_MEMORY_MAX_USAGE); + let mut contents = String::new(); + OpenOptions::new() + .create(false) + .read(true) + .open(path)? + .read_to_string(&mut contents)?; + + contents = contents.trim().to_string(); + + if contents == "max" { + return Ok(u64::MAX); + } + + let val = contents.parse::()?; + Ok(val) + } + + fn get_memory_limit(cgroup_root: &Path) -> Result { + let path = cgroup_root.join(CGROUP_MEMORY_LIMIT); + let mut contents = String::new(); + OpenOptions::new() + .create(false) + .read(true) + .open(path)? + .read_to_string(&mut contents)?; + + contents = contents.trim().to_string(); + + if contents == "max" { + return Ok(i64::MAX); + } + + let val = contents.parse::()?; + Ok(val) + } + + fn set(val: T, path: &Path) -> std::io::Result<()> { + OpenOptions::new() + .create(false) + .write(true) + .truncate(true) + .open(path)? + .write_all(val.to_string().as_bytes())?; + Ok(()) + } + + fn set_memory(val: i64, cgroup_root: &Path) -> Result<()> { + let path = cgroup_root.join(CGROUP_MEMORY_LIMIT); + + match Self::set(val, &path) { + Ok(_) => Ok(()), + Err(e) => { + // we need to look into the raw OS error for an EBUSY status + match e.raw_os_error() { + Some(code) => match Errno::from_i32(code) { + Errno::EBUSY => { + let usage = Self::get_memory_usage(cgroup_root)?; + let max_usage = Self::get_memory_max_usage(cgroup_root)?; + Err(anyhow!( + "unable to set memory limit to {} (current usage: {}, peak usage: {})", + val, + usage, + max_usage, + )) + } + _ => Err(anyhow!(e)), + }, + None => Err(anyhow!(e)), + } + } + } + } + + fn set_swap(val: i64, cgroup_root: &Path) -> Result<()> { + if val == 0 { + return Ok(()); + } + + let path = cgroup_root.join(CGROUP_MEMORY_SWAP_LIMIT); + + Self::set(val, &path)?; + + Ok(()) + } + + fn set_memory_and_swap( + limit: i64, + swap: i64, + is_updated: bool, + cgroup_root: &Path, + ) -> Result<()> { + // According to runc we need to change the write sequence of + // limit and swap so it won't fail, because the new and old + // values don't fit the kernel's validation + // see: + // https://github.com/opencontainers/runc/blob/3f6594675675d4e88901c782462f56497260b1d2/libcontainer/cgroups/fs/memory.go#L89 + if is_updated { + Self::set_swap(swap, cgroup_root)?; + Self::set_memory(limit, cgroup_root)?; + } + Self::set_memory(limit, cgroup_root)?; + Self::set_swap(swap, cgroup_root)?; + Ok(()) + } + + fn apply(resource: &LinuxMemory, cgroup_root: &Path) -> Result<()> { + match resource.limit { + Some(limit) => { + let current_limit = Self::get_memory_limit(cgroup_root)?; + match resource.swap { + Some(swap) => { + let is_updated = swap == -1 || current_limit < swap; + Self::set_memory_and_swap(limit, swap, is_updated, cgroup_root)?; + } + None => { + if limit == -1 { + Self::set_memory_and_swap(limit, -1, true, cgroup_root)?; + } else { + let is_updated = current_limit < 0; + Self::set_memory_and_swap(limit, 0, is_updated, cgroup_root)?; + } + } + } + } + None => match resource.swap { + Some(swap) => { + Self::set_memory_and_swap(0, swap, false, cgroup_root)?; + } + None => { + Self::set_memory_and_swap(0, 0, false, cgroup_root)?; + } + }, + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::spec::LinuxMemory; + + fn set_fixture(temp_dir: &std::path::Path, filename: &str, val: &str) -> Result<()> { + std::fs::OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(temp_dir.join(filename))? + .write_all(val.as_bytes())?; + + Ok(()) + } + + fn create_temp_dir(test_name: &str) -> Result { + std::fs::create_dir_all(std::env::temp_dir().join(test_name))?; + Ok(std::env::temp_dir().join(test_name)) + } + + #[test] + fn test_set_memory() { + let limit = 1024; + let tmp = create_temp_dir("test_set_memory").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_USAGE, "0").expect("Set fixure for memory usage"); + set_fixture(&tmp, CGROUP_MEMORY_MAX_USAGE, "0").expect("Set fixure for max memory usage"); + set_fixture(&tmp, CGROUP_MEMORY_LIMIT, "0").expect("Set fixure for memory limit"); + Memory::set_memory(limit, &tmp).expect("Set memory limit"); + let content = + std::fs::read_to_string(tmp.join(CGROUP_MEMORY_LIMIT)).expect("Read to string"); + assert_eq!(limit.to_string(), content) + } + + #[test] + fn test_set_swap() { + let limit = 512; + let tmp = create_temp_dir("test_set_swap").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP_LIMIT, "0").expect("Set fixure for swap limit"); + Memory::set_swap(limit, &tmp).expect("Set swap limit"); + let content = + std::fs::read_to_string(tmp.join(CGROUP_MEMORY_SWAP_LIMIT)).expect("Read to string"); + assert_eq!(limit.to_string(), content) + } + + #[test] + fn test_set_memory_and_swap() { + let tmp = + create_temp_dir("test_set_memory_and_swap").expect("create temp directory for test"); + set_fixture(&tmp, CGROUP_MEMORY_USAGE, "0").expect("Set fixure for memory usage"); + set_fixture(&tmp, CGROUP_MEMORY_MAX_USAGE, "0").expect("Set fixure for max memory usage"); + set_fixture(&tmp, CGROUP_MEMORY_LIMIT, "0").expect("Set fixure for memory limit"); + set_fixture(&tmp, CGROUP_MEMORY_SWAP_LIMIT, "0").expect("Set fixure for swap limit"); + + // test unlimited memory with no set swap + { + let limit = -1; + let linux_memory = &LinuxMemory { + limit: Some(limit), + swap: None, // Some(0) gives the same outcome + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + Memory::apply(linux_memory, &tmp).expect("Set memory and swap"); + + let limit_content = + std::fs::read_to_string(tmp.join(CGROUP_MEMORY_LIMIT)).expect("Read to string"); + assert_eq!(limit.to_string(), limit_content); + + let swap_content = std::fs::read_to_string(tmp.join(CGROUP_MEMORY_SWAP_LIMIT)) + .expect("Read to string"); + // swap should be set to -1 also + assert_eq!(limit.to_string(), swap_content); + } + + // test setting swap and memory to arbitrary values + { + let limit = 1024 * 1024 * 1024; + let swap = 1024; + let linux_memory = &LinuxMemory { + limit: Some(limit), + swap: Some(swap), + reservation: None, + kernel: None, + kernel_tcp: None, + swappiness: None, + }; + Memory::apply(linux_memory, &tmp).expect("Set memory and swap"); + + let limit_content = + std::fs::read_to_string(tmp.join(CGROUP_MEMORY_LIMIT)).expect("Read to string"); + assert_eq!(limit.to_string(), limit_content); + + let swap_content = std::fs::read_to_string(tmp.join(CGROUP_MEMORY_SWAP_LIMIT)) + .expect("Read to string"); + assert_eq!(swap.to_string(), swap_content); + } + } +} diff --git a/src/cgroups/mod.rs b/src/cgroups/mod.rs index f6c2acdbd..ff6635365 100644 --- a/src/cgroups/mod.rs +++ b/src/cgroups/mod.rs @@ -3,6 +3,7 @@ mod controller_type; mod devices; mod hugetlb; mod manager; +mod memory; mod pids; pub use controller::Controller; pub use controller_type::ControllerType;