From 7d0acd4c7b4dfaca89ea6044639b566d8a989e5d Mon Sep 17 00:00:00 2001 From: Mohamad Arab Date: Thu, 18 Oct 2018 20:00:01 +0200 Subject: [PATCH] Make CGroups CPU period configurable Signed-off-by: Mohamad Arab --- README.md | 1 + agent/api/task/task.go | 2 +- agent/api/task/task_linux.go | 12 ++++----- agent/api/task/task_linux_test.go | 16 +++++------ agent/api/task/task_unsupported.go | 3 ++- agent/api/task/task_windows.go | 3 ++- agent/config/config.go | 6 +++++ agent/config/config_test.go | 3 +++ agent/config/config_unix.go | 1 + agent/config/config_unix_test.go | 43 ++++++++++++++++++++++++++++++ agent/config/parse.go | 13 +++++++++ agent/config/types.go | 3 +++ 12 files changed, 89 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index ce97645ada1..27ddc115d30 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,7 @@ additional details on each available environment variable. | `ECS_HOST_DATA_DIR` | `/var/lib/ecs` | The source directory on the host from which ECS_DATADIR is mounted. We use this to determine the source mount path for container metadata files in the case the ECS Agent is running as a container. We do not use this value in Windows because the ECS Agent is not running as container in Windows. | `/var/lib/ecs` | `Not used` | | `ECS_ENABLE_TASK_CPU_MEM_LIMIT` | `true` | Whether to enable task-level cpu and memory limits | `true` | `false` | | `ECS_CGROUP_PATH` | `/sys/fs/cgroup` | The root cgroup path that is expected by the ECS agent. This is the path that accessible from the agent mount. | `/sys/fs/cgroup` | Not applicable | +| `ECS_CGROUP_CPU_PERIOD` | `10ms` | CGroups CPU period for task level limits. This value should be between 8ms to 100ms | `100ms` | Not applicable | | `ECS_ENABLE_CPU_UNBOUNDED_WINDOWS_WORKAROUND` | `true` | When `true`, ECS will allow CPU unbounded(CPU=`0`) tasks to run along with CPU bounded tasks in Windows. | Not applicable | `false` | | `ECS_TASK_METADATA_RPS_LIMIT` | `100,150` | Comma separated integer values for steady state and burst throttle limits for task metadata endpoint | `40,60` | `40,60` | | `ECS_SHARED_VOLUME_MATCH_FULL_CONFIG` | `true` | When `true`, ECS Agent will compare name, driver options, and labels to make sure volumes are identical. When `false`, Agent will short circuit shared volume comparison if the names match. This is the default Docker behavior. If a volume is shared across instances, this should be set to `false`. | `false` | `false`| diff --git a/agent/api/task/task.go b/agent/api/task/task.go index a3be5698ea0..82f7f909445 100644 --- a/agent/api/task/task.go +++ b/agent/api/task/task.go @@ -250,7 +250,7 @@ func (task *Task) PostUnmarshalTask(cfg *config.Config, // hook into this task.adjustForPlatform(cfg) if task.MemoryCPULimitsEnabled { - err := task.initializeCgroupResourceSpec(cfg.CgroupPath, resourceFields) + err := task.initializeCgroupResourceSpec(cfg.CgroupPath, cfg.CgroupCPUPeriod, resourceFields) if err != nil { seelog.Errorf("Task [%s]: could not intialize resource: %v", task.Arn, err) return apierrors.NewResourceInitError(task.Arn, err) diff --git a/agent/api/task/task_linux.go b/agent/api/task/task_linux.go index 25794c2291a..ee76fb22a8b 100644 --- a/agent/api/task/task_linux.go +++ b/agent/api/task/task_linux.go @@ -51,12 +51,12 @@ func (task *Task) adjustForPlatform(cfg *config.Config) { task.MemoryCPULimitsEnabled = cfg.TaskCPUMemLimit.Enabled() } -func (task *Task) initializeCgroupResourceSpec(cgroupPath string, resourceFields *taskresource.ResourceFields) error { +func (task *Task) initializeCgroupResourceSpec(cgroupPath string, cGroupCPUPeriod time.Duration, resourceFields *taskresource.ResourceFields) error { cgroupRoot, err := task.BuildCgroupRoot() if err != nil { return errors.Wrapf(err, "cgroup resource: unable to determine cgroup root for task") } - resSpec, err := task.BuildLinuxResourceSpec() + resSpec, err := task.BuildLinuxResourceSpec(cGroupCPUPeriod) if err != nil { return errors.Wrapf(err, "cgroup resource: unable to build resource spec for task") } @@ -85,13 +85,13 @@ func (task *Task) BuildCgroupRoot() (string, error) { } // BuildLinuxResourceSpec returns a linuxResources object for the task cgroup -func (task *Task) BuildLinuxResourceSpec() (specs.LinuxResources, error) { +func (task *Task) BuildLinuxResourceSpec(cGroupCPUPeriod time.Duration) (specs.LinuxResources, error) { linuxResourceSpec := specs.LinuxResources{} // If task level CPU limits are requested, set CPU quota + CPU period // Else set CPU shares if task.CPU > 0 { - linuxCPUSpec, err := task.buildExplicitLinuxCPUSpec() + linuxCPUSpec, err := task.buildExplicitLinuxCPUSpec(cGroupCPUPeriod) if err != nil { return specs.LinuxResources{}, err } @@ -116,13 +116,13 @@ func (task *Task) BuildLinuxResourceSpec() (specs.LinuxResources, error) { // buildExplicitLinuxCPUSpec builds CPU spec when task CPU limits are // explicitly requested -func (task *Task) buildExplicitLinuxCPUSpec() (specs.LinuxCPU, error) { +func (task *Task) buildExplicitLinuxCPUSpec(cGroupCPUPeriod time.Duration) (specs.LinuxCPU, error) { if task.CPU > maxTaskVCPULimit { return specs.LinuxCPU{}, errors.Errorf("task CPU spec builder: unsupported CPU limits, requested=%f, max-supported=%d", task.CPU, maxTaskVCPULimit) } - taskCPUPeriod := uint64(defaultCPUPeriod / time.Microsecond) + taskCPUPeriod := uint64(cGroupCPUPeriod / time.Microsecond) taskCPUQuota := int64(task.CPU * float64(taskCPUPeriod)) // TODO: DefaultCPUPeriod only permits 10VCPUs. diff --git a/agent/api/task/task_linux_test.go b/agent/api/task/task_linux_test.go index 46fbacd5f39..82a1acb1939 100644 --- a/agent/api/task/task_linux_test.go +++ b/agent/api/task/task_linux_test.go @@ -250,7 +250,7 @@ func TestBuildLinuxResourceSpecCPUMem(t *testing.T) { }, } - linuxResourceSpec, err := task.BuildLinuxResourceSpec() + linuxResourceSpec, err := task.BuildLinuxResourceSpec(defaultCPUPeriod) assert.NoError(t, err) assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec) @@ -272,7 +272,7 @@ func TestBuildLinuxResourceSpecCPU(t *testing.T) { }, } - linuxResourceSpec, err := task.BuildLinuxResourceSpec() + linuxResourceSpec, err := task.BuildLinuxResourceSpec(defaultCPUPeriod) assert.NoError(t, err) assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec) @@ -295,7 +295,7 @@ func TestBuildLinuxResourceSpecWithoutTaskCPULimits(t *testing.T) { }, } - linuxResourceSpec, err := task.BuildLinuxResourceSpec() + linuxResourceSpec, err := task.BuildLinuxResourceSpec(defaultCPUPeriod) assert.NoError(t, err) assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec) @@ -319,7 +319,7 @@ func TestBuildLinuxResourceSpecWithoutTaskCPUWithContainerCPULimits(t *testing.T }, } - linuxResourceSpec, err := task.BuildLinuxResourceSpec() + linuxResourceSpec, err := task.BuildLinuxResourceSpec(defaultCPUPeriod) assert.NoError(t, err) assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec) @@ -342,7 +342,7 @@ func TestBuildLinuxResourceSpecInvalidMem(t *testing.T) { } expectedLinuxResourceSpec := specs.LinuxResources{} - linuxResourceSpec, err := task.BuildLinuxResourceSpec() + linuxResourceSpec, err := task.BuildLinuxResourceSpec(defaultCPUPeriod) assert.Error(t, err) assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec) @@ -488,7 +488,7 @@ func TestInitCgroupResourceSpecHappyPath(t *testing.T) { defer ctrl.Finish() mockControl := mock_control.NewMockControl(ctrl) mockIO := mock_ioutilwrapper.NewMockIOUtil(ctrl) - assert.NoError(t, task.initializeCgroupResourceSpec("cgroupPath", &taskresource.ResourceFields{ + assert.NoError(t, task.initializeCgroupResourceSpec("cgroupPath", defaultCPUPeriod, &taskresource.ResourceFields{ Control: mockControl, ResourceFieldsCommon: &taskresource.ResourceFieldsCommon{ IOUtil: mockIO, @@ -512,7 +512,7 @@ func TestInitCgroupResourceSpecInvalidARN(t *testing.T) { MemoryCPULimitsEnabled: true, ResourcesMapUnsafe: make(map[string][]taskresource.TaskResource), } - assert.Error(t, task.initializeCgroupResourceSpec("", nil)) + assert.Error(t, task.initializeCgroupResourceSpec("", time.Millisecond, nil)) assert.Equal(t, 0, len(task.GetResources())) assert.Equal(t, 0, len(task.Containers[0].TransitionDependenciesMap)) } @@ -533,7 +533,7 @@ func TestInitCgroupResourceSpecInvalidMem(t *testing.T) { MemoryCPULimitsEnabled: true, ResourcesMapUnsafe: make(map[string][]taskresource.TaskResource), } - assert.Error(t, task.initializeCgroupResourceSpec("", nil)) + assert.Error(t, task.initializeCgroupResourceSpec("", time.Millisecond, nil)) assert.Equal(t, 0, len(task.GetResources())) assert.Equal(t, 0, len(task.Containers[0].TransitionDependenciesMap)) } diff --git a/agent/api/task/task_unsupported.go b/agent/api/task/task_unsupported.go index de006332dbc..5c3a642dede 100644 --- a/agent/api/task/task_unsupported.go +++ b/agent/api/task/task_unsupported.go @@ -26,6 +26,7 @@ import ( const ( defaultCPUPeriod = 100 * time.Millisecond // 100ms + // With a 100ms CPU period, we can express 0.01 vCPU to 10 vCPUs maxTaskVCPULimit = 10 // Reference: http://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html @@ -46,7 +47,7 @@ func (task *Task) adjustForPlatform(cfg *config.Config) { func getCanonicalPath(path string) string { return path } -func (task *Task) initializeCgroupResourceSpec(cgroupPath string, resourceFields *taskresource.ResourceFields) error { +func (task *Task) initializeCgroupResourceSpec(cgroupPath string, cGroupCPUPeriod time.Duration, resourceFields *taskresource.ResourceFields) error { return nil } diff --git a/agent/api/task/task_windows.go b/agent/api/task/task_windows.go index 0c0a847c694..4dc152bf045 100644 --- a/agent/api/task/task_windows.go +++ b/agent/api/task/task_windows.go @@ -20,6 +20,7 @@ import ( "path/filepath" "runtime" "strings" + "time" "github.com/aws/amazon-ecs-agent/agent/config" "github.com/aws/amazon-ecs-agent/agent/taskresource" @@ -120,6 +121,6 @@ func (task *Task) dockerCPUShares(containerCPU uint) int64 { return int64(containerCPU) } -func (task *Task) initializeCgroupResourceSpec(cgroupPath string, resourceFields *taskresource.ResourceFields) error { +func (task *Task) initializeCgroupResourceSpec(cgroupPath string, cGroupCPUPeriod time.Duration, resourceFields *taskresource.ResourceFields) error { return errors.New("unsupported platform") } diff --git a/agent/config/config.go b/agent/config/config.go index b1b3033dda6..eb44e2aabf9 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -133,6 +133,11 @@ const ( // DefaultNvidiaRuntime is the name of the runtime to pass Nvidia GPUs to containers DefaultNvidiaRuntime = "nvidia" + + // defaultCgroupCPUPeriod is set to 100 ms to set isCFS period and quota for task limits + defaultCgroupCPUPeriod = 100 * time.Millisecond + maximumCgroupCPUPeriod = 100 * time.Millisecond + minimumCgroupCPUPeriod = 8 * time.Millisecond ) const ( @@ -539,6 +544,7 @@ func environmentConfig() (Config, error) { GPUSupportEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_GPU_SUPPORT"), false), NvidiaRuntime: os.Getenv("ECS_NVIDIA_RUNTIME"), TaskMetadataAZDisabled: utils.ParseBool(os.Getenv("ECS_DISABLE_TASK_METADATA_AZ"), false), + CgroupCPUPeriod: parseCgroupCPUPeriod(), }, err } diff --git a/agent/config/config_test.go b/agent/config/config_test.go index 8159de4319a..b73988de7f9 100644 --- a/agent/config/config_test.go +++ b/agent/config/config_test.go @@ -117,10 +117,12 @@ func TestEnvironmentConfig(t *testing.T) { defer setTestEnv("ECS_NVIDIA_RUNTIME", "nvidia")() defer setTestEnv("ECS_POLL_METRICS", "true")() defer setTestEnv("ECS_POLLING_METRICS_WAIT_DURATION", "10s")() + defer setTestEnv("ECS_CGROUP_CPU_PERIOD", "") additionalLocalRoutesJSON := `["1.2.3.4/22","5.6.7.8/32"]` setTestEnv("ECS_AWSVPC_ADDITIONAL_LOCAL_ROUTES", additionalLocalRoutesJSON) setTestEnv("ECS_ENABLE_CONTAINER_METADATA", "true") setTestEnv("ECS_HOST_DATA_DIR", "/etc/ecs/") + setTestEnv("ECS_CGROUP_CPU_PERIOD", "10ms") conf, err := environmentConfig() assert.NoError(t, err) @@ -163,6 +165,7 @@ func TestEnvironmentConfig(t *testing.T) { assert.True(t, conf.GPUSupportEnabled, "Wrong value for GPUSupportEnabled") assert.Equal(t, "nvidia", conf.NvidiaRuntime) assert.True(t, conf.TaskMetadataAZDisabled, "Wrong value for TaskMetadataAZDisabled") + assert.Equal(t, 10*time.Millisecond, conf.CgroupCPUPeriod) } func TestTrimWhitespaceWhenCreating(t *testing.T) { diff --git a/agent/config/config_unix.go b/agent/config/config_unix.go index aefdeee088e..a6756bdf42c 100644 --- a/agent/config/config_unix.go +++ b/agent/config/config_unix.go @@ -80,6 +80,7 @@ func DefaultConfig() Config { PollMetrics: false, PollingMetricsWaitDuration: DefaultPollingMetricsWaitDuration, NvidiaRuntime: DefaultNvidiaRuntime, + CgroupCPUPeriod: defaultCgroupCPUPeriod, } } diff --git a/agent/config/config_unix_test.go b/agent/config/config_unix_test.go index 8a453068ffa..81b9e303313 100644 --- a/agent/config/config_unix_test.go +++ b/agent/config/config_unix_test.go @@ -65,6 +65,7 @@ func TestConfigDefault(t *testing.T) { assert.Equal(t, DefaultTaskMetadataBurstRate, cfg.TaskMetadataBurstRate, "Default TaskMetadataBurstRate is set incorrectly") assert.False(t, cfg.SharedVolumeMatchFullConfig, "Default SharedVolumeMatchFullConfig set incorrectly") + assert.Equal(t, defaultCgroupCPUPeriod, cfg.CgroupCPUPeriod, "CFS cpu period set incorrectly") } // TestConfigFromFile tests the configuration can be read from file @@ -209,3 +210,45 @@ func TestEmptyNvidiaRuntime(t *testing.T) { assert.NoError(t, err) assert.Equal(t, DefaultNvidiaRuntime, cfg.NvidiaRuntime, "Wrong value for NvidiaRuntime") } + +func TestCPUPeriodSettings(t *testing.T) { + cases := []struct { + Name string + Env string + Response time.Duration + }{ + { + Name: "OverrideDefaultCPUPeriod", + Env: "10ms", + Response: 10 * time.Millisecond, + }, + { + Name: "DefaultCPUPeriod", + Env: "", + Response: defaultCgroupCPUPeriod, + }, + { + Name: "TestCPUPeriodUpperBoundLimit", + Env: "110ms", + Response: defaultCgroupCPUPeriod, + }, + { + Name: "TestCPUPeriodLowerBoundLimit", + Env: "7ms", + Response: defaultCgroupCPUPeriod, + }, + } + + for _, c := range cases { + t.Run(c.Name, func(t *testing.T) { + defer setTestRegion()() + defer os.Setenv("ECS_CGROUP_CPU_PERIOD", "100ms") + + os.Setenv("ECS_CGROUP_CPU_PERIOD", c.Env) + conf, err := NewConfig(ec2.NewBlackholeEC2MetadataClient()) + + assert.NoError(t, err) + assert.Equal(t, c.Response, conf.CgroupCPUPeriod, "Wrong value for CgroupCPUPeriod") + }) + } +} diff --git a/agent/config/parse.go b/agent/config/parse.go index 0aa202d54f6..7d9e04cd392 100644 --- a/agent/config/parse.go +++ b/agent/config/parse.go @@ -307,3 +307,16 @@ func parseImageCleanupExclusionList(envVar string) []string { } return imageCleanupExclusionList } + +func parseCgroupCPUPeriod() time.Duration { + duration := parseEnvVariableDuration("ECS_CGROUP_CPU_PERIOD") + + if duration >= minimumCgroupCPUPeriod && duration <= maximumCgroupCPUPeriod { + return duration + } else if duration != 0 { + seelog.Warnf("CPU Period duration value: %v for Environment Variable ECS_CGROUP_CPU_PERIOD is not within [%v, %v], using default value instead", + duration, minimumCgroupCPUPeriod, maximumCgroupCPUPeriod) + } + + return defaultCgroupCPUPeriod +} diff --git a/agent/config/types.go b/agent/config/types.go index 426127b7e7f..731c69cfab0 100644 --- a/agent/config/types.go +++ b/agent/config/types.go @@ -283,4 +283,7 @@ type Config struct { // TaskMetadataAZDisabled specifies if availability zone should be disabled in Task Metadata endpoint TaskMetadataAZDisabled bool + + // CgroupCPUPeriod is config option to set different CFS quota and period values in microsecond, defaults to 100 ms + CgroupCPUPeriod time.Duration }