From bd745fa3e5d6e23c2e07b20c9310a6fb21902dbe Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Wed, 11 Nov 2020 16:20:34 -0500 Subject: [PATCH] raw_exec: don't use cgroups when no_cgroup is set (#9328) When raw_exec is configured with [`no_cgroups`](https://www.nomadproject.io/docs/drivers/raw_exec#no_cgroups), raw_exec shouldn't attempt to create a cgroup. Prior to this change, we accidentally always required freezer cgroup to do stats PID tracking. We already have the proper fallback in place for metrics, so only need to ensure that we don't create a cgroup for the task. Fixes https://github.com/hashicorp/nomad/issues/8565 --- CHANGELOG.md | 1 + drivers/rawexec/driver_unix_test.go | 59 +++++++++++++++++++ drivers/shared/executor/executor.go | 6 +- .../shared/executor/executor_linux_test.go | 43 ++++++++++++++ 4 files changed, 107 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 031d1b2b6b9..8148563523f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ BUG FIXES: * csi: Fixed a bug where multi-writer volumes were allowed only 1 write claim. [[GH-9040](https://github.com/hashicorp/nomad/issues/9040)] * csi: Fixed a bug where `nomad volume detach` would not accept prefixes for the node ID parameter. [[GH-9041](https://github.com/hashicorp/nomad/issues/9041)] * driver/docker: Fixed a bug where the default `image_delay` configuration was ignored if the `gc` configuration was not set. [[GH-9101](https://github.com/hashicorp/nomad/issues/9101)] + * driver/rawexec: Fixed a bug where raw_exec attempts to create a freezer cgroups for the tasks even when `no_cgroups` is set. [[GH-9328](https://github.com/hashicorp/nomad/issues/9328)] ## 0.12.8 (November 10, 2020) diff --git a/drivers/rawexec/driver_unix_test.go b/drivers/rawexec/driver_unix_test.go index 77bb7c4acc7..5b72a274a87 100644 --- a/drivers/rawexec/driver_unix_test.go +++ b/drivers/rawexec/driver_unix_test.go @@ -338,3 +338,62 @@ func TestRawExec_ExecTaskStreaming(t *testing.T) { dtestutil.ExecTaskStreamingConformanceTests(t, harness, task.ID) } + +func TestRawExecDriver_NoCgroup(t *testing.T) { + t.Parallel() + if runtime.GOOS != "linux" { + t.Skip("Linux only test") + } + + expectedBytes, err := ioutil.ReadFile("/proc/self/cgroup") + require.NoError(t, err) + expected := strings.TrimSpace(string(expectedBytes)) + + d := newEnabledRawExecDriver(t) + d.config.NoCgroups = true + harness := dtestutil.NewDriverHarness(t, d) + + task := &drivers.TaskConfig{ + ID: uuid.Generate(), + Name: "nocgroup", + } + + cleanup := harness.MkAllocDir(task, true) + defer cleanup() + + tc := &TaskConfig{ + Command: "/bin/cat", + Args: []string{"/proc/self/cgroup"}, + } + require.NoError(t, task.EncodeConcreteDriverConfig(&tc)) + testtask.SetTaskConfigEnv(task) + + _, _, err = harness.StartTask(task) + require.NoError(t, err) + + // Task should terminate quickly + waitCh, err := harness.WaitTask(context.Background(), task.ID) + require.NoError(t, err) + select { + case res := <-waitCh: + require.True(t, res.Successful()) + require.Zero(t, res.ExitCode) + case <-time.After(time.Duration(testutil.TestMultiplier()*6) * time.Second): + require.Fail(t, "WaitTask timeout") + } + + // Check the log file to see it exited because of the signal + outputFile := filepath.Join(task.TaskDir().LogDir, "nocgroup.stdout.0") + testutil.WaitForResult(func() (bool, error) { + act, err := ioutil.ReadFile(outputFile) + if err != nil { + return false, fmt.Errorf("Couldn't read expected output: %v", err) + } + + if strings.TrimSpace(string(act)) != expected { + t.Logf("Read from %v", outputFile) + return false, fmt.Errorf("Command outputted\n%v; want\n%v", string(act), expected) + } + return true, nil + }, func(err error) { require.NoError(t, err) }) +} diff --git a/drivers/shared/executor/executor.go b/drivers/shared/executor/executor.go index 15a41bbce9c..370a0cea13e 100644 --- a/drivers/shared/executor/executor.go +++ b/drivers/shared/executor/executor.go @@ -280,8 +280,10 @@ func (e *UniversalExecutor) Launch(command *ExecCommand) (*ProcessState, error) } // Setup cgroups on linux - if err := e.configureResourceContainer(os.Getpid()); err != nil { - return nil, err + if e.commandCfg.ResourceLimits || e.commandCfg.BasicProcessCgroup { + if err := e.configureResourceContainer(os.Getpid()); err != nil { + return nil, err + } } stdout, err := e.commandCfg.Stdout() diff --git a/drivers/shared/executor/executor_linux_test.go b/drivers/shared/executor/executor_linux_test.go index 11e038825af..223ccc515d7 100644 --- a/drivers/shared/executor/executor_linux_test.go +++ b/drivers/shared/executor/executor_linux_test.go @@ -571,3 +571,46 @@ func TestExecutor_cmdMounts(t *testing.T) { require.EqualValues(t, expected, cmdMounts(input)) } + +// TestUniversalExecutor_NoCgroup asserts that commands are executed in the +// same cgroup as parent process +func TestUniversalExecutor_NoCgroup(t *testing.T) { + t.Parallel() + testutil.ExecCompatible(t) + + expectedBytes, err := ioutil.ReadFile("/proc/self/cgroup") + require.NoError(t, err) + + expected := strings.TrimSpace(string(expectedBytes)) + + testExecCmd := testExecutorCommand(t) + execCmd, allocDir := testExecCmd.command, testExecCmd.allocDir + execCmd.Cmd = "/bin/cat" + execCmd.Args = []string{"/proc/self/cgroup"} + defer allocDir.Destroy() + + execCmd.BasicProcessCgroup = false + execCmd.ResourceLimits = false + + executor := NewExecutor(testlog.HCLogger(t)) + defer executor.Shutdown("SIGKILL", 0) + + _, err = executor.Launch(execCmd) + require.NoError(t, err) + + _, err = executor.Wait(context.Background()) + require.NoError(t, err) + + tu.WaitForResult(func() (bool, error) { + act := strings.TrimSpace(string(testExecCmd.stdout.String())) + if expected != act { + return false, fmt.Errorf("expected:\n%s actual:\n%s", expected, act) + } + return true, nil + }, func(err error) { + stderr := strings.TrimSpace(string(testExecCmd.stderr.String())) + t.Logf("stderr: %v", stderr) + require.NoError(t, err) + }) + +}