Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cgroupv1 support to available_parallelism #97925

Merged
merged 3 commits into from
Jul 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@
#![feature(hasher_prefixfree_extras)]
#![feature(hashmap_internals)]
#![feature(int_error_internals)]
#![feature(is_some_with)]
#![feature(maybe_uninit_slice)]
#![feature(maybe_uninit_write_slice)]
#![feature(mixed_integer_ops)]
Expand Down
235 changes: 189 additions & 46 deletions library/std/src/sys/unix/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ pub fn available_parallelism() -> io::Result<NonZeroUsize> {
))] {
#[cfg(any(target_os = "android", target_os = "linux"))]
{
let quota = cgroup2_quota().max(1);
let quota = cgroups::quota().max(1);
let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
unsafe {
if libc::sched_getaffinity(0, mem::size_of::<libc::cpu_set_t>(), &mut set) == 0 {
Expand Down Expand Up @@ -379,49 +379,88 @@ pub fn available_parallelism() -> io::Result<NonZeroUsize> {
}
}

/// Returns cgroup CPU quota in core-equivalents, rounded down, or usize::MAX if the quota cannot
/// be determined or is not set.
#[cfg(any(target_os = "android", target_os = "linux"))]
fn cgroup2_quota() -> usize {
mod cgroups {
//! Currently not covered
//! * cgroup v2 in non-standard mountpoints
//! * paths containing control characters or spaces, since those would be escaped in procfs
//! output and we don't unescape
use crate::borrow::Cow;
use crate::ffi::OsString;
use crate::fs::{try_exists, File};
use crate::io::Read;
use crate::io::{BufRead, BufReader};
use crate::os::unix::ffi::OsStringExt;
use crate::path::Path;
use crate::path::PathBuf;
use crate::str::from_utf8;

let mut quota = usize::MAX;
if cfg!(miri) {
// Attempting to open a file fails under default flags due to isolation.
// And Miri does not have parallelism anyway.
return quota;
}

let _: Option<()> = try {
let mut buf = Vec::with_capacity(128);
// find our place in the cgroup hierarchy
File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
let cgroup_path = buf
.split(|&c| c == b'\n')
.filter_map(|line| {
let mut fields = line.splitn(3, |&c| c == b':');
// expect cgroupv2 which has an empty 2nd field
if fields.nth(1) != Some(b"") {
return None;
}
let path = fields.last()?;
// skip leading slash
Some(path[1..].to_owned())
})
.next()?;
let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
#[derive(PartialEq)]
enum Cgroup {
V1,
V2,
}

/// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
/// be determined or is not set.
pub(super) fn quota() -> usize {
let mut quota = usize::MAX;
if cfg!(miri) {
// Attempting to open a file fails under default flags due to isolation.
// And Miri does not have parallelism anyway.
return quota;
}

let _: Option<()> = try {
let mut buf = Vec::with_capacity(128);
// find our place in the cgroup hierarchy
File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
let (cgroup_path, version) =
buf.split(|&c| c == b'\n').fold(None, |previous, line| {
let mut fields = line.splitn(3, |&c| c == b':');
// 2nd field is a list of controllers for v1 or empty for v2
let version = match fields.nth(1) {
Some(b"") => Cgroup::V2,
Some(controllers)
if from_utf8(controllers)
.is_ok_and(|c| c.split(",").any(|c| c == "cpu")) =>
{
Cgroup::V1
}
_ => return previous,
};

// already-found v1 trumps v2 since it explicitly specifies its controllers
if previous.is_some() && version == Cgroup::V2 {
return previous;
}

let path = fields.last()?;
// skip leading slash
Some((path[1..].to_owned(), version))
})?;
let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));

quota = match version {
Cgroup::V1 => quota_v1(cgroup_path),
Cgroup::V2 => quota_v2(cgroup_path),
};
};

quota
}

fn quota_v2(group_path: PathBuf) -> usize {
let mut quota = usize::MAX;

let mut path = PathBuf::with_capacity(128);
let mut read_buf = String::with_capacity(20);

// standard mount location defined in file-hierarchy(7) manpage
let cgroup_mount = "/sys/fs/cgroup";

path.push(cgroup_mount);
path.push(&cgroup_path);
path.push(&group_path);

path.push("cgroup.controllers");

Expand All @@ -432,30 +471,134 @@ fn cgroup2_quota() -> usize {

path.pop();

while path.starts_with(cgroup_mount) {
path.push("cpu.max");
let _: Option<()> = try {
while path.starts_with(cgroup_mount) {
path.push("cpu.max");

read_buf.clear();

if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
let raw_quota = read_buf.lines().next()?;
let mut raw_quota = raw_quota.split(' ');
let limit = raw_quota.next()?;
let period = raw_quota.next()?;
match (limit.parse::<usize>(), period.parse::<usize>()) {
(Ok(limit), Ok(period)) => {
quota = quota.min(limit / period);
}
_ => {}
}
}

read_buf.clear();
path.pop(); // pop filename
path.pop(); // pop dir
}
};

if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
let raw_quota = read_buf.lines().next()?;
let mut raw_quota = raw_quota.split(' ');
let limit = raw_quota.next()?;
let period = raw_quota.next()?;
match (limit.parse::<usize>(), period.parse::<usize>()) {
(Ok(limit), Ok(period)) => {
quota = quota.min(limit / period);
}
quota
}

fn quota_v1(group_path: PathBuf) -> usize {
let mut quota = usize::MAX;
let mut path = PathBuf::with_capacity(128);
let mut read_buf = String::with_capacity(20);

// Hardcode commonly used locations mentioned in the cgroups(7) manpage
// if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
|p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
|p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
// this can be expensive on systems with tons of mountpoints
// but we only get to this point when /proc/self/cgroups explicitly indicated
// this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
find_mountpoint,
];

for mount in mounts {
let Some((mount, group_path)) = mount(&group_path) else { continue };

path.clear();
path.push(mount.as_ref());
path.push(&group_path);

// skip if we guessed the mount incorrectly
if matches!(try_exists(&path), Err(_) | Ok(false)) {
continue;
}

while path.starts_with(mount.as_ref()) {
let mut parse_file = |name| {
path.push(name);
read_buf.clear();

let f = File::open(&path);
path.pop(); // restore buffer before any early returns
f.ok()?.read_to_string(&mut read_buf).ok()?;
let parsed = read_buf.trim().parse::<usize>().ok()?;

Some(parsed)
};

let limit = parse_file("cpu.cfs_quota_us");
let period = parse_file("cpu.cfs_period_us");

match (limit, period) {
(Some(limit), Some(period)) => quota = quota.min(limit / period),
_ => {}
}

path.pop();
}

path.pop(); // pop filename
path.pop(); // pop dir
// we passed the try_exists above so we should have traversed the correct hierarchy
// when reaching this line
break;
}
};

quota
quota
}

/// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
///
/// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
/// over the already-included prefix
fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
let mut reader = BufReader::new(File::open("/proc/self/mountinfo").ok()?);
let mut line = String::with_capacity(256);
loop {
line.clear();
if reader.read_line(&mut line).ok()? == 0 {
break;
}

let line = line.trim();
let mut items = line.split(' ');

let sub_path = items.nth(3)?;
let mount_point = items.next()?;
let mount_opts = items.next_back()?;
let filesystem_type = items.nth_back(1)?;

if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
// not a cgroup / not a cpu-controller
continue;
}

let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;

if !group_path.starts_with(sub_path) {
// this is a bind-mount and the bound subdirectory
// does not contain the cgroup this process belongs to
continue;
}

let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;

return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
}

None
}
}

#[cfg(all(
Expand Down
7 changes: 6 additions & 1 deletion library/std/src/thread/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1571,10 +1571,15 @@ fn _assert_sync_and_send() {
///
/// On Linux:
/// - It may overcount the amount of parallelism available when limited by a
/// process-wide affinity mask or cgroup quotas and cgroup2 fs or `sched_getaffinity()` can't be
/// process-wide affinity mask or cgroup quotas and `sched_getaffinity()` or cgroup fs can't be
/// queried, e.g. due to sandboxing.
/// - It may undercount the amount of parallelism if the current thread's affinity mask
/// does not reflect the process' cpuset, e.g. due to pinned threads.
the8472 marked this conversation as resolved.
Show resolved Hide resolved
/// - If the process is in a cgroup v1 cpu controller, this may need to
/// scan mountpoints to find the corresponding cgroup v1 controller,
/// which may take time on systems with large numbers of mountpoints.
/// (This does not apply to cgroup v2, or to processes not in a
/// cgroup.)
///
/// On all targets:
/// - It may overcount the amount of parallelism available when running in a VM
Expand Down