Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docker: disable cpuset management for non-root clients #23804

Merged
merged 1 commit into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/23804.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
docker: Disable cpuset management for non-root clients
```
4 changes: 4 additions & 0 deletions drivers/docker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,10 @@ type DriverConfig struct {

AllowRuntimesList []string `codec:"allow_runtimes"`
allowRuntimes map[string]struct{} `codec:"-"`

// prevents task handles from writing to cpuset cgroups we don't have
// permissions to; not user configurable
disableCpusetManagement bool `codec:"-"`
}

type AuthConfig struct {
Expand Down
52 changes: 27 additions & 25 deletions drivers/docker/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,18 +253,19 @@ func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
}

h := &taskHandle{
dockerClient: dockerClient,
dockerCGroupDriver: dockerInfo.CgroupDriver,
infinityClient: infinityClient,
logger: d.logger.With("container_id", container.ID),
task: handle.Config,
containerID: container.ID,
containerCgroup: container.HostConfig.Cgroup,
containerImage: container.Image,
doneCh: make(chan bool),
waitCh: make(chan struct{}),
removeContainerOnExit: d.config.GC.Container,
net: handleState.DriverNetwork,
dockerClient: dockerClient,
dockerCGroupDriver: dockerInfo.CgroupDriver,
infinityClient: infinityClient,
logger: d.logger.With("container_id", container.ID),
task: handle.Config,
containerID: container.ID,
containerCgroup: container.HostConfig.Cgroup,
containerImage: container.Image,
doneCh: make(chan bool),
waitCh: make(chan struct{}),
removeContainerOnExit: d.config.GC.Container,
net: handleState.DriverNetwork,
disableCpusetManagement: d.config.disableCpusetManagement,
}

if loggingIsEnabled(d.config, handle.Config) {
Expand Down Expand Up @@ -453,19 +454,20 @@ CREATE:

// Return a driver handle
h := &taskHandle{
dockerClient: dockerClient,
dockerCGroupDriver: dockerInfo.CgroupDriver,
infinityClient: infinityClient,
dlogger: dlogger,
dloggerPluginClient: pluginClient,
logger: d.logger.With("container_id", container.ID),
task: cfg,
containerID: container.ID,
containerImage: container.Image,
doneCh: make(chan bool),
waitCh: make(chan struct{}),
removeContainerOnExit: d.config.GC.Container,
net: net,
dockerClient: dockerClient,
dockerCGroupDriver: dockerInfo.CgroupDriver,
infinityClient: infinityClient,
dlogger: dlogger,
dloggerPluginClient: pluginClient,
logger: d.logger.With("container_id", container.ID),
task: cfg,
containerID: container.ID,
containerImage: container.Image,
doneCh: make(chan bool),
waitCh: make(chan struct{}),
removeContainerOnExit: d.config.GC.Container,
net: net,
disableCpusetManagement: d.config.disableCpusetManagement,
}

if err := handle.SetDriverState(h.buildState()); err != nil {
Expand Down
10 changes: 5 additions & 5 deletions drivers/docker/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ func (d *Driver) buildFingerprint() *drivers.Fingerprint {
HealthDescription: drivers.DriverHealthy,
}

// disable if non-root on linux systems
// warn if non-root on linux systems unless we've intentionally disabled
// cpuset management
if runtime.GOOS == "linux" && !utils.IsUnixRoot() {
fp.Health = drivers.HealthStateUndetected
fp.HealthDescription = drivers.DriverRequiresRootMessage
d.setFingerprintFailure()
return fp
d.config.disableCpusetManagement = true
d.logger.Warn("docker driver requires running as root: resources.cores and NUMA-aware scheduling will not function correctly on this node, including for non-docker tasks")
fp.Attributes["driver.docker.cpuset_management.disabled"] = pstructs.NewBoolAttribute(true)
}

dockerClient, err := d.getDockerClient()
Expand Down
25 changes: 13 additions & 12 deletions drivers/docker/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,18 @@ type taskHandle struct {
// normal dockerClient which includes a default timeout.
infinityClient *docker.Client

logger hclog.Logger
dlogger docklog.DockerLogger
dloggerPluginClient *plugin.Client
task *drivers.TaskConfig
containerID string
containerCgroup string
containerImage string
doneCh chan bool
waitCh chan struct{}
removeContainerOnExit bool
net *drivers.DriverNetwork
logger hclog.Logger
dlogger docklog.DockerLogger
dloggerPluginClient *plugin.Client
task *drivers.TaskConfig
containerID string
containerCgroup string
containerImage string
doneCh chan bool
waitCh chan struct{}
removeContainerOnExit bool
net *drivers.DriverNetwork
disableCpusetManagement bool

exitResult *drivers.ExitResult
exitResultLock sync.Mutex
Expand Down Expand Up @@ -247,7 +248,7 @@ func (h *taskHandle) shutdownLogger() {
}

func (h *taskHandle) startCpusetFixer() {
if cgroupslib.GetMode() == cgroupslib.OFF {
if cgroupslib.GetMode() == cgroupslib.OFF || h.disableCpusetManagement {
return
}

Expand Down
8 changes: 8 additions & 0 deletions website/content/docs/drivers/docker.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,13 @@ user to the `docker` group so you can run Nomad without root:
$ sudo usermod -G docker -a nomad
```

Nomad clients manage a cpuset cgroup for each task to reserve or share CPU
[cores][]. In order for Nomad to be compatible with Docker's own cgroups
management, it must write to cgroups owned by Docker, which requires running as
root. If Nomad is not running as root, CPU isolation and NUMA-aware scheduling
will not function correctly for workloads with `resources.cores`, including
workloads using task drivers other than `docker` on the same host.

For the best performance and security features you should use recent versions
of the Linux Kernel and Docker daemon.

Expand Down Expand Up @@ -1238,3 +1245,4 @@ Windows is relatively new and rapidly evolving you may want to consult the
[runtime_env]: /nomad/docs/runtime/environment#job-related-variables
[`--cap-add`]: https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities
[`--cap-drop`]: https://docs.docker.com/engine/reference/run/#runtime-privilege-and-linux-capabilities
[cores]: /nomad/docs/job-specification/resources#cores