From 87f184a057421464f0890f86322acfd272608e2d Mon Sep 17 00:00:00 2001 From: Oliver Beattie Date: Wed, 9 Aug 2017 22:57:53 +0100 Subject: [PATCH] [PLAT-713] Allow the CFS period to be tuned for containers --- pkg/kubelet/cm/helpers_linux.go | 20 +-- pkg/kubelet/cm/helpers_linux_test.go | 137 +++++++++++++----- pkg/kubelet/kuberuntime/helpers.go | 15 +- .../kuberuntime/kuberuntime_container.go | 6 +- staging/src/k8s.io/api/core/v1/types.go | 7 + 5 files changed, 134 insertions(+), 51 deletions(-) diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go index 39f445a943134..08200a0e48d32 100644 --- a/pkg/kubelet/cm/helpers_linux.go +++ b/pkg/kubelet/cm/helpers_linux.go @@ -37,12 +37,13 @@ const ( MilliCPUToCPU = 1000 // 100000 is equivalent to 100ms - QuotaPeriod = 100000 - MinQuotaPeriod = 1000 + DefaultQuotaPeriod int64 = 100000 + MinQuotaPeriod int64 = 1000 ) -// MilliCPUToQuota converts milliCPU to CFS quota and period values. -func MilliCPUToQuota(milliCPU int64) (quota int64, period int64) { +// MilliCPUToQuota takes milliCPU (along with a CFS period, in usec) and returns +// a CFS quota value +func MilliCPUToQuota(milliCPU, period int64) (quota int64) { // CFS quota is measured in two values: // - cfs_period_us=100ms (the amount of time to measure usage across) // - cfs_quota=20ms (the amount of cpu time allowed to be used across a period) @@ -53,11 +54,8 @@ func MilliCPUToQuota(milliCPU int64) (quota int64, period int64) { return } - // we set the period to 100ms by default - period = QuotaPeriod - // we then convert your milliCPU to a value normalized over a period - quota = (milliCPU * QuotaPeriod) / MilliCPUToCPU + quota = (milliCPU * period) / MilliCPUToCPU // quota needs to be a minimum of 1ms. if quota < MinQuotaPeriod { @@ -90,6 +88,7 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig { cpuRequests := int64(0) cpuLimits := int64(0) + cpuPeriod := DefaultQuotaPeriod memoryLimits := int64(0) if request, found := reqs[v1.ResourceCPU]; found { cpuRequests = request.MilliValue() @@ -97,13 +96,16 @@ func ResourceConfigForPod(pod *v1.Pod) *ResourceConfig { if limit, found := limits[v1.ResourceCPU]; found { cpuLimits = limit.MilliValue() } + if limit, found := limits[v1.ResourceCPUPeriodUsec]; found { + cpuPeriod = limit.Value() + } if limit, found := limits[v1.ResourceMemory]; found { memoryLimits = limit.Value() } // convert to CFS values cpuShares := MilliCPUToShares(cpuRequests) - cpuQuota, cpuPeriod := MilliCPUToQuota(cpuLimits) + cpuQuota := MilliCPUToQuota(cpuLimits, cpuPeriod) // track if limits were applied for each resource. memoryLimitsDeclared := true diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go index eebded160da48..1f1033438986b 100644 --- a/pkg/kubelet/cm/helpers_linux_test.go +++ b/pkg/kubelet/cm/helpers_linux_test.go @@ -53,11 +53,14 @@ func TestResourceConfigForPod(t *testing.T) { memoryQuantity := resource.MustParse("200Mi") burstableMemory := memoryQuantity.Value() burstablePartialShares := MilliCPUToShares(200) - burstableQuota, burstablePeriod := MilliCPUToQuota(200) + burstablePeriod := DefaultQuotaPeriod + burstableQuota := MilliCPUToQuota(200, burstablePeriod) guaranteedShares := MilliCPUToShares(100) - guaranteedQuota, guaranteedPeriod := MilliCPUToQuota(100) + guaranteedPeriod := int64(10000) + guaranteedQuota := MilliCPUToQuota(100, guaranteedPeriod) memoryQuantity = resource.MustParse("100Mi") guaranteedMemory := memoryQuantity.Value() + testCases := map[string]struct { pod *v1.Pod expected *ResourceConfig @@ -67,19 +70,30 @@ func TestResourceConfigForPod(t *testing.T) { Spec: v1.PodSpec{ Containers: []v1.Container{ { - Resources: getResourceRequirements(getResourceList("", ""), getResourceList("", "")), + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{}, + Limits: v1.ResourceList{}, + }, }, }, }, }, - expected: &ResourceConfig{CpuShares: &minShares}, + expected: &ResourceConfig{ + CpuShares: &minShares, + }, }, "burstable-no-limits": { pod: &v1.Pod{ Spec: v1.PodSpec{ Containers: []v1.Container{ { - Resources: getResourceRequirements(getResourceList("100m", "100Mi"), getResourceList("", "")), + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + Limits: v1.ResourceList{}, + }, }, }, }, @@ -91,39 +105,86 @@ func TestResourceConfigForPod(t *testing.T) { Spec: v1.PodSpec{ Containers: []v1.Container{ { - Resources: getResourceRequirements(getResourceList("100m", "100Mi"), getResourceList("200m", "200Mi")), + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("200m"), + v1.ResourceMemory: resource.MustParse("200Mi"), + }, + }, }, }, }, }, - expected: &ResourceConfig{CpuShares: &burstableShares, CpuQuota: &burstableQuota, CpuPeriod: &burstablePeriod, Memory: &burstableMemory}, + expected: &ResourceConfig{ + CpuShares: &burstableShares, + CpuQuota: &burstableQuota, + CpuPeriod: &burstablePeriod, + Memory: &burstableMemory, + }, }, "burstable-partial-limits": { pod: &v1.Pod{ Spec: v1.PodSpec{ Containers: []v1.Container{ { - Resources: getResourceRequirements(getResourceList("100m", "100Mi"), getResourceList("200m", "200Mi")), + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("200m"), + v1.ResourceMemory: resource.MustParse("200Mi"), + }, + }, }, { - Resources: getResourceRequirements(getResourceList("100m", "100Mi"), getResourceList("", "")), + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + }, + Limits: v1.ResourceList{}, + }, }, }, }, }, - expected: &ResourceConfig{CpuShares: &burstablePartialShares}, + expected: &ResourceConfig{ + CpuShares: &burstablePartialShares, + }, }, "guaranteed": { pod: &v1.Pod{ Spec: v1.PodSpec{ Containers: []v1.Container{ { - Resources: getResourceRequirements(getResourceList("100m", "100Mi"), getResourceList("100m", "100Mi")), + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourceCPUPeriodUsec: resource.MustParse("10000"), + }, + Limits: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("100m"), + v1.ResourceMemory: resource.MustParse("100Mi"), + v1.ResourceCPUPeriodUsec: resource.MustParse("10000"), + }, + }, }, }, }, }, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedPeriod, Memory: &guaranteedMemory}, + expected: &ResourceConfig{ + CpuShares: &guaranteedShares, + CpuQuota: &guaranteedQuota, + CpuPeriod: &guaranteedPeriod, + Memory: &guaranteedMemory, + }, }, } for testName, testCase := range testCases { @@ -145,55 +206,65 @@ func TestResourceConfigForPod(t *testing.T) { func TestMilliCPUToQuota(t *testing.T) { testCases := []struct { - input int64 - quota int64 + cpu int64 period int64 + quota int64 }{ { - input: int64(0), + cpu: int64(0), + period: int64(100000), quota: int64(0), - period: int64(0), }, { - input: int64(5), - quota: int64(1000), + cpu: int64(5), period: int64(100000), + quota: int64(1000), }, { - input: int64(9), - quota: int64(1000), + cpu: int64(9), period: int64(100000), + quota: int64(1000), }, { - input: int64(10), - quota: int64(1000), + cpu: int64(10), period: int64(100000), + quota: int64(1000), }, { - input: int64(200), - quota: int64(20000), + cpu: int64(200), period: int64(100000), + quota: int64(20000), }, { - input: int64(500), - quota: int64(50000), + cpu: int64(500), period: int64(100000), + quota: int64(50000), }, { - input: int64(1000), - quota: int64(100000), + cpu: int64(1000), period: int64(100000), + quota: int64(100000), }, { - input: int64(1500), - quota: int64(150000), + cpu: int64(1500), period: int64(100000), + quota: int64(150000), + }, + { + cpu: int64(1500), + period: int64(10000), + quota: int64(15000), + }, + { + cpu: int64(250), + period: int64(5000), + quota: int64(1250), }, } for _, testCase := range testCases { - quota, period := MilliCPUToQuota(testCase.input) - if quota != testCase.quota || period != testCase.period { - t.Errorf("Input %v, expected quota %v period %v, but got quota %v period %v", testCase.input, testCase.quota, testCase.period, quota, period) + quota := MilliCPUToQuota(testCase.cpu, testCase.period) + if quota != testCase.quota { + t.Errorf("Input (cpu=%d, period=%d), expected quota=%d but got quota=%d", testCase.cpu, testCase.period, testCase.quota, quota) } } } diff --git a/pkg/kubelet/kuberuntime/helpers.go b/pkg/kubelet/kuberuntime/helpers.go index 8fcbe29a33ceb..53014a250abd8 100644 --- a/pkg/kubelet/kuberuntime/helpers.go +++ b/pkg/kubelet/kuberuntime/helpers.go @@ -36,8 +36,8 @@ const ( milliCPUToCPU = 1000 // 100000 is equivalent to 100ms - quotaPeriod = 100 * minQuotaPeriod - minQuotaPeriod = 1000 + defaultQuotaPeriod int64 = 100000 + minQuotaPeriod int64 = 1000 ) var ( @@ -176,22 +176,21 @@ func milliCPUToShares(milliCPU int64) int64 { return shares } -// milliCPUToQuota converts milliCPU to CFS quota and period values -func milliCPUToQuota(milliCPU int64) (quota int64, period int64) { +// milliCPUToQuota takes milliCPU (along with a CFS period, in usec) and returns +// a CFS quota value +func milliCPUToQuota(milliCPU, period int64) (quota int64) { // CFS quota is measured in two values: // - cfs_period_us=100ms (the amount of time to measure usage across) // - cfs_quota=20ms (the amount of cpu time allowed to be used across a period) // so in the above example, you are limited to 20% of a single CPU // for multi-cpu environments, you just scale equivalent amounts + if milliCPU == 0 { return } - // we set the period to 100ms by default - period = quotaPeriod - // we then convert your milliCPU to a value normalized over a period - quota = (milliCPU * quotaPeriod) / milliCPUToCPU + quota = (milliCPU * period) / milliCPUToCPU // quota needs to be a minimum of 1ms. if quota < minQuotaPeriod { diff --git a/pkg/kubelet/kuberuntime/kuberuntime_container.go b/pkg/kubelet/kuberuntime/kuberuntime_container.go index 7b8a15484daf8..d7347d48916ee 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_container.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_container.go @@ -256,9 +256,13 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C lc.Resources.OomScoreAdj = oomScoreAdj if m.cpuCFSQuota { + cpuPeriod := defaultQuotaPeriod + if period, found := container.Resources.Limits[v1.ResourceCPUPeriodUsec]; found { + cpuPeriod = period.Value() + } // if cpuLimit.Amount is nil, then the appropriate default value is returned // to allow full usage of cpu resource. - cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue()) + cpuQuota := milliCPUToQuota(cpuLimit.MilliValue(), cpuPeriod) lc.Resources.CpuQuota = cpuQuota lc.Resources.CpuPeriod = cpuPeriod } diff --git a/staging/src/k8s.io/api/core/v1/types.go b/staging/src/k8s.io/api/core/v1/types.go index 6b79e441e9d8b..198900b2d9dff 100644 --- a/staging/src/k8s.io/api/core/v1/types.go +++ b/staging/src/k8s.io/api/core/v1/types.go @@ -3599,6 +3599,13 @@ const ( // NVIDIA GPU, in devices. Alpha, might change: although fractional and allowing values >1, only one whole device per node is assigned. ResourceNvidiaGPU ResourceName = "alpha.kubernetes.io/nvidia-gpu" // Number of Pods that may be running on this Node: see ResourcePods + + // -- Monzo-specific + + // CPU throttling period, in microseconds. + // NOTE: Only set this value yourself if you know very well how to tune the + // Linux CFS scheduler, or in consultation with the Platform team. + ResourceCPUPeriodUsec ResourceName = "monzo.com/cpu-period" ) const (