diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager.go b/cluster-autoscaler/cloudprovider/gce/gce_manager.go index 7cf662bcbe22..7ed9100374e9 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_manager.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_manager.go @@ -26,6 +26,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -478,7 +479,7 @@ func (m *gceManagerImpl) getCpuAndMemoryForMachineType(machineType string, zone } m.cache.AddMachineToCache(machineType, zone, machine) } - return machine.GuestCpus, machine.MemoryMb * bytesPerMB, nil + return machine.GuestCpus, machine.MemoryMb * units.MiB, nil } func parseCustomMachineType(machineType string) (cpu, mem int64, err error) { @@ -492,6 +493,6 @@ func parseCustomMachineType(machineType string) (cpu, mem int64, err error) { return 0, 0, fmt.Errorf("failed to parse all params in %s", machineType) } // Mb to bytes - mem = mem * bytesPerMB + mem = mem * units.MiB return } diff --git a/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go b/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go index 54d0ab79c207..af2ebf4bbdcd 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_manager_test.go @@ -24,6 +24,7 @@ import ( "time" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -1092,14 +1093,14 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) { cpu, mem, err := g.getCpuAndMemoryForMachineType("custom-8-2", zoneB) assert.NoError(t, err) assert.Equal(t, int64(8), cpu) - assert.Equal(t, int64(2*bytesPerMB), mem) + assert.Equal(t, int64(2*units.MiB), mem) mock.AssertExpectationsForObjects(t, server) // Standard machine type found in cache. cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-1", zoneB) assert.NoError(t, err) assert.Equal(t, int64(1), cpu) - assert.Equal(t, int64(1*bytesPerMB), mem) + assert.Equal(t, int64(1*units.MiB), mem) mock.AssertExpectationsForObjects(t, server) // Standard machine type not found in cache. @@ -1107,14 +1108,14 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) { cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB) assert.NoError(t, err) assert.Equal(t, int64(2), cpu) - assert.Equal(t, int64(3840*bytesPerMB), mem) + assert.Equal(t, int64(3840*units.MiB), mem) mock.AssertExpectationsForObjects(t, server) // Standard machine type cached. cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB) assert.NoError(t, err) assert.Equal(t, int64(2), cpu) - assert.Equal(t, int64(3840*bytesPerMB), mem) + assert.Equal(t, int64(3840*units.MiB), mem) mock.AssertExpectationsForObjects(t, server) // Standard machine type not found in the zone. @@ -1129,7 +1130,7 @@ func TestParseCustomMachineType(t *testing.T) { cpu, mem, err := parseCustomMachineType("custom-2-2816") assert.NoError(t, err) assert.Equal(t, int64(2), cpu) - assert.Equal(t, int64(2816*bytesPerMB), mem) + assert.Equal(t, int64(2816*units.MiB), mem) cpu, mem, err = parseCustomMachineType("other-a2-2816") assert.Error(t, err) cpu, mem, err = parseCustomMachineType("other-2-2816") diff --git a/cluster-autoscaler/cloudprovider/gce/gce_price_model.go b/cluster-autoscaler/cloudprovider/gce/gce_price_model.go index 2031ec13ceed..c89368f3d3da 100644 --- a/cluster-autoscaler/cloudprovider/gce/gce_price_model.go +++ b/cluster-autoscaler/cloudprovider/gce/gce_price_model.go @@ -147,7 +147,7 @@ func getBasePrice(resources apiv1.ResourceList, startTime time.Time, endTime tim cpu := resources[apiv1.ResourceCPU] mem := resources[apiv1.ResourceMemory] price += float64(cpu.MilliValue()) / 1000.0 * cpuPricePerHour * hours - price += float64(mem.Value()) / float64(units.Gigabyte) * memoryPricePerHourPerGb * hours + price += float64(mem.Value()) / float64(units.GiB) * memoryPricePerHourPerGb * hours return price } diff --git a/cluster-autoscaler/cloudprovider/gce/templates.go b/cluster-autoscaler/cloudprovider/gce/templates.go index bf8ba7245761..4b807a327e65 100644 --- a/cluster-autoscaler/cloudprovider/gce/templates.go +++ b/cluster-autoscaler/cloudprovider/gce/templates.go @@ -23,12 +23,13 @@ import ( "strings" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" gce "google.golang.org/api/compute/v1" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" "github.com/ghodss/yaml" @@ -36,14 +37,19 @@ import ( ) const ( - mbPerGB = 1000 - bytesPerMB = 1000 * 1000 - millicoresPerCore = 1000 - // Kubelet "evictionHard: {memory.available}" is subtracted from - // capacity when calculating allocatable (on top of kube-reserved). + // KubeletEvictionHardMemory is subtracted from capacity + // when calculating allocatable (on top of kube-reserved). + // Equals kubelet "evictionHard: {memory.available}" // We don't have a good place to get it from, but it has been hard-coded // to 100Mi since at least k8s 1.4. - kubeletEvictionHardMemory = 100 * 1024 * 1024 + KubeletEvictionHardMemory = 100 * units.MiB + + // Kernel reserved memory is subtracted when calculating total memory. + kernelReservedRatio = 64 + kernelReservedMemory = 16 * units.MiB + // Reserved memory for software IO TLB + swiotlbReservedMemory = 64 * units.MiB + swiotlbThresholdMemory = 3 * units.GiB ) // GceTemplateBuilder builds templates for GCE nodes. @@ -59,13 +65,14 @@ func (t *GceTemplateBuilder) getAcceleratorCount(accelerators []*gce.Accelerator return count } -// BuildCapacity builds a list of resource capacities for a node. -func (t *GceTemplateBuilder) BuildCapacity(machineType string, accelerators []*gce.AcceleratorConfig, zone string, cpu int64, mem int64) (apiv1.ResourceList, error) { +// BuildCapacity builds a list of resource capacities given list of hardware. +func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []*gce.AcceleratorConfig) (apiv1.ResourceList, error) { capacity := apiv1.ResourceList{} // TODO: get a real value. capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI) capacity[apiv1.ResourceCPU] = *resource.NewQuantity(cpu, resource.DecimalSI) - capacity[apiv1.ResourceMemory] = *resource.NewQuantity(mem, resource.DecimalSI) + memTotal := mem - calculateKernelReserved(mem) + capacity[apiv1.ResourceMemory] = *resource.NewQuantity(memTotal, resource.DecimalSI) if accelerators != nil && len(accelerators) > 0 { capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(t.getAcceleratorCount(accelerators), resource.DecimalSI) @@ -91,13 +98,10 @@ func (t *GceTemplateBuilder) BuildAllocatableFromKubeEnv(capacity apiv1.Resource if err != nil { return nil, err } - if quantity, found := reserved[apiv1.ResourceMemory]; found { - reserved[apiv1.ResourceMemory] = *resource.NewQuantity(quantity.Value()+kubeletEvictionHardMemory, resource.BinarySI) - } return t.CalculateAllocatable(capacity, reserved), nil } -// CalculateAllocatable computes allocatable resources substracting reserved values from corresponding capacity. +// CalculateAllocatable computes allocatable resources subtracting reserved values from corresponding capacity. func (t *GceTemplateBuilder) CalculateAllocatable(capacity, reserved apiv1.ResourceList) apiv1.ResourceList { allocatable := apiv1.ResourceList{} for key, value := range capacity { @@ -105,6 +109,9 @@ func (t *GceTemplateBuilder) CalculateAllocatable(capacity, reserved apiv1.Resou if reservedQuantity, found := reserved[key]; found { quantity.Sub(reservedQuantity) } + if key == apiv1.ResourceMemory { + quantity.Sub(*resource.NewQuantity(KubeletEvictionHardMemory, resource.BinarySI)) + } allocatable[key] = quantity } return allocatable @@ -126,7 +133,7 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan Labels: map[string]string{}, } - capacity, err := t.BuildCapacity(template.Properties.MachineType, template.Properties.GuestAccelerators, mig.GceRef().Zone, cpu, mem) + capacity, err := t.BuildCapacity(cpu, mem, template.Properties.GuestAccelerators) if err != nil { return nil, err } @@ -333,3 +340,16 @@ func buildTaints(kubeEnvTaints map[string]string) ([]apiv1.Taint, error) { } return taints, nil } + +// calculateKernelReserved computes how much memory Linux kernel will reserve. +// TODO: account for crashkernel reservation on RHEL / CentOS +func calculateKernelReserved(mem int64) int64 { + // Account for memory reserved by kernel + reserved := int64(mem / kernelReservedRatio) + reserved += kernelReservedMemory + // Account for software IO TLB allocation if memory requires 64bit addressing + if mem > swiotlbThresholdMemory { + reserved += swiotlbReservedMemory + } + return reserved +} diff --git a/cluster-autoscaler/cloudprovider/gce/templates_test.go b/cluster-autoscaler/cloudprovider/gce/templates_test.go index 4dfc829262f5..88b0c823da98 100644 --- a/cluster-autoscaler/cloudprovider/gce/templates_test.go +++ b/cluster-autoscaler/cloudprovider/gce/templates_test.go @@ -22,6 +22,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" gpuUtils "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" gce "google.golang.org/api/compute/v1" apiv1 "k8s.io/api/core/v1" @@ -34,104 +35,138 @@ import ( func TestBuildNodeFromTemplateSetsResources(t *testing.T) { type testCase struct { - kubeEnv string - name string - machineType string - accelerators []*gce.AcceleratorConfig - mig Mig - capacityCpu int64 - capacityMemory int64 - allocatableCpu string - allocatableMemory string - gpuCount int64 - expectedErr bool + scenario string + kubeEnv string + accelerators []*gce.AcceleratorConfig + mig Mig + physicalCpu int64 + physicalMemory int64 + kubeReserved bool + reservedCpu string + reservedMemory string + expectedGpuCount int64 + expectedErr bool } - testCases := []testCase{{ - kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" + - "NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" + - "DNS_SERVER_IP: '10.0.0.10'\n" + - fmt.Sprintf("KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=%v\n", 1024*1024) + - "NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n", - name: "nodeName", - machineType: "custom-8-2", - accelerators: []*gce.AcceleratorConfig{ - {AcceleratorType: "nvidia-tesla-k80", AcceleratorCount: 3}, - {AcceleratorType: "nvidia-tesla-p100", AcceleratorCount: 8}, - }, - mig: &gceMig{ - gceRef: GceRef{ - Name: "some-name", - Project: "some-proj", - Zone: "us-central1-b", + testCases := []testCase{ + { + scenario: "kube-reserved present in kube-env", + kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" + + "NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" + + "DNS_SERVER_IP: '10.0.0.10'\n" + + fmt.Sprintf("KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=%v\n", 1024*1024) + + "NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n", + accelerators: []*gce.AcceleratorConfig{ + {AcceleratorType: "nvidia-tesla-k80", AcceleratorCount: 3}, + {AcceleratorType: "nvidia-tesla-p100", AcceleratorCount: 8}, }, + physicalCpu: 8, + physicalMemory: 200 * units.MiB, + kubeReserved: true, + reservedCpu: "1000m", + reservedMemory: fmt.Sprintf("%v", 1024*1024), + expectedGpuCount: 11, + expectedErr: false, }, - capacityCpu: 8, - capacityMemory: 200 * 1024 * 1024, - allocatableCpu: "7000m", - allocatableMemory: fmt.Sprintf("%v", 99*1024*1024), - gpuCount: 11, - expectedErr: false, - }, { + scenario: "no kube-reserved in kube-env", kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" + "NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" + "DNS_SERVER_IP: '10.0.0.10'\n" + "NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n", - name: "nodeName", - machineType: "custom-8-2", - mig: &gceMig{ - gceRef: GceRef{ - Name: "some-name", - Project: "some-proj", - Zone: "us-central1-b", - }, - }, - capacityCpu: 8, - capacityMemory: 2 * 1024 * 1024, - allocatableCpu: "8000m", - allocatableMemory: fmt.Sprintf("%v", 2*1024*1024), - expectedErr: false, + physicalCpu: 8, + physicalMemory: 200 * units.MiB, + kubeReserved: false, + expectedGpuCount: 11, + expectedErr: false, }, { + scenario: "totally messed up kube-env", kubeEnv: "This kube-env is totally messed up", - name: "nodeName", - machineType: "custom-8-2", - mig: &gceMig{ + expectedErr: true, + }, + } + for _, tc := range testCases { + t.Run(tc.scenario, func(t *testing.T) { + tb := &GceTemplateBuilder{} + mig := &gceMig{ gceRef: GceRef{ Name: "some-name", Project: "some-proj", Zone: "us-central1-b", }, - }, - expectedErr: true, + } + template := &gce.InstanceTemplate{ + Name: "node-name", + Properties: &gce.InstanceProperties{ + GuestAccelerators: tc.accelerators, + Metadata: &gce.Metadata{ + Items: []*gce.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}}, + }, + MachineType: "irrelevant-type", + }, + } + node, err := tb.BuildNodeFromTemplate(mig, template, tc.physicalCpu, tc.physicalMemory) + if tc.expectedErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, tc.accelerators) + assert.NoError(t, err) + assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity) + if !tc.kubeReserved { + assertEqualResourceLists(t, "Allocatable", capacity, node.Status.Allocatable) + } else { + reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0) + assert.NoError(t, err) + allocatable := tb.CalculateAllocatable(capacity, reserved) + assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable) + } + } + }) + } +} + +func TestCalculateAllocatable(t *testing.T) { + type testCase struct { + scenario string + capacityCpu string + reservedCpu string + allocatableCpu string + capacityMemory string + reservedMemory string + allocatableMemory string + } + testCases := []testCase{ + { + scenario: "no reservations", + capacityCpu: "8", + reservedCpu: "0", + allocatableCpu: "8", + capacityMemory: fmt.Sprintf("%v", 200*1024*1024), + reservedMemory: "0", + allocatableMemory: fmt.Sprintf("%v", 200*1024*1024-KubeletEvictionHardMemory), + }, + { + scenario: "reserved cpu and memory", + capacityCpu: "8", + reservedCpu: "1000m", + allocatableCpu: "7000m", + capacityMemory: fmt.Sprintf("%v", 200*1024*1024), + reservedMemory: fmt.Sprintf("%v", 50*1024*1024), + allocatableMemory: fmt.Sprintf("%v", 150*1024*1024-KubeletEvictionHardMemory), }, } for _, tc := range testCases { - tb := &GceTemplateBuilder{} - template := &gce.InstanceTemplate{ - Name: tc.name, - Properties: &gce.InstanceProperties{ - GuestAccelerators: tc.accelerators, - Metadata: &gce.Metadata{ - Items: []*gce.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}}, - }, - MachineType: tc.machineType, - }, - } - node, err := tb.BuildNodeFromTemplate(tc.mig, template, tc.capacityCpu, tc.capacityMemory) - if tc.expectedErr { - assert.Error(t, err) - } else { + t.Run(tc.scenario, func(t *testing.T) { + tb := &GceTemplateBuilder{} + capacity, err := makeResourceList(tc.capacityCpu, tc.capacityMemory, 0) assert.NoError(t, err) - podsQuantity, _ := resource.ParseQuantity("110") - capacity, err := makeResourceList(fmt.Sprintf("%dm", tc.capacityCpu*1000), fmt.Sprintf("%v", tc.capacityMemory), tc.gpuCount) - capacity[apiv1.ResourcePods] = podsQuantity + reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0) assert.NoError(t, err) - allocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, tc.gpuCount) - allocatable[apiv1.ResourcePods] = podsQuantity + expectedAllocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, 0) assert.NoError(t, err) - assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity) - assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable) - } + allocatable := tb.CalculateAllocatable(capacity, reserved) + assertEqualResourceLists(t, "Allocatable", expectedAllocatable, allocatable) + }) } } @@ -498,6 +533,80 @@ func TestParseKubeReserved(t *testing.T) { } } +func TestBuildCapacityMemory(t *testing.T) { + type testCase struct { + physicalMemory int64 + capacityMemory int64 + physicalCpu int64 + } + testCases := []testCase{ + { + physicalMemory: 2 * units.GiB, + capacityMemory: 2*units.GiB - 32*units.MiB - kernelReservedMemory, + physicalCpu: 1, + }, + { + physicalMemory: 4 * units.GiB, + capacityMemory: 4*units.GiB - 64*units.MiB - kernelReservedMemory - swiotlbReservedMemory, + physicalCpu: 2, + }, + { + physicalMemory: 128 * units.GiB, + capacityMemory: 128*units.GiB - 2*units.GiB - kernelReservedMemory - swiotlbReservedMemory, + physicalCpu: 32, + }, + } + for idx, tc := range testCases { + t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) { + tb := &GceTemplateBuilder{} + capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, make([]*gce.AcceleratorConfig, 0)) + assert.NoError(t, err) + expected, err := makeResourceList2(tc.physicalCpu, tc.capacityMemory, 0, 110) + assert.NoError(t, err) + assertEqualResourceLists(t, "Capacity", capacity, expected) + }) + } +} + +func TestCalculateKernelReserved(t *testing.T) { + type testCase struct { + physicalMemory int64 + reservedMemory int64 + } + testCases := []testCase{ + { + physicalMemory: 256 * units.MiB, + reservedMemory: 4*units.MiB + kernelReservedMemory, + }, + { + physicalMemory: 2 * units.GiB, + reservedMemory: 32*units.MiB + kernelReservedMemory, + }, + { + physicalMemory: 3 * units.GiB, + reservedMemory: 48*units.MiB + kernelReservedMemory, + }, + { + physicalMemory: 3.25 * units.GiB, + reservedMemory: 52*units.MiB + kernelReservedMemory + swiotlbReservedMemory, + }, + { + physicalMemory: 4 * units.GiB, + reservedMemory: 64*units.MiB + kernelReservedMemory + swiotlbReservedMemory, + }, + { + physicalMemory: 128 * units.GiB, + reservedMemory: 2*units.GiB + kernelReservedMemory + swiotlbReservedMemory, + }, + } + for idx, tc := range testCases { + t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) { + reserved := calculateKernelReserved(tc.physicalMemory) + assert.Equal(t, tc.reservedMemory, reserved) + }) + } +} + func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool { set := make(map[apiv1.Taint]bool) for _, taint := range taints { @@ -528,6 +637,20 @@ func makeResourceList(cpu string, memory string, gpu int64) (apiv1.ResourceList, return result, nil } +func makeResourceList2(cpu int64, memory int64, gpu int64, pods int64) (apiv1.ResourceList, error) { + result := apiv1.ResourceList{} + result[apiv1.ResourceCPU] = *resource.NewQuantity(cpu, resource.DecimalSI) + result[apiv1.ResourceMemory] = *resource.NewQuantity(memory, resource.BinarySI) + if gpu > 0 { + result[gpuUtils.ResourceNvidiaGPU] = *resource.NewQuantity(gpu, resource.DecimalSI) + } + if pods > 0 { + result[apiv1.ResourcePods] = *resource.NewQuantity(pods, resource.DecimalSI) + } + return result, nil +} + func assertEqualResourceLists(t *testing.T, name string, expected, actual apiv1.ResourceList) { + t.Helper() assert.True(t, quota.V1Equals(expected, actual), "%q unequal:\nExpected:%v\nActual:%v", name, expected, actual) } diff --git a/cluster-autoscaler/cloudprovider/gke/autoscaling_gke_client_v1beta1.go b/cluster-autoscaler/cloudprovider/gke/autoscaling_gke_client_v1beta1.go index c61dcd0ebcc0..8517fe21ca4d 100644 --- a/cluster-autoscaler/cloudprovider/gke/autoscaling_gke_client_v1beta1.go +++ b/cluster-autoscaler/cloudprovider/gke/autoscaling_gke_client_v1beta1.go @@ -117,10 +117,10 @@ func buildResourceLimiter(cluster *gke_api_beta.Cluster) *cloudprovider.Resource // GKE API provides memory in GB, but ResourceLimiter expects them in bytes if _, found := minLimits[cloudprovider.ResourceNameMemory]; found { - minLimits[cloudprovider.ResourceNameMemory] = minLimits[cloudprovider.ResourceNameMemory] * units.Gigabyte + minLimits[cloudprovider.ResourceNameMemory] = minLimits[cloudprovider.ResourceNameMemory] * units.GiB } if _, found := maxLimits[cloudprovider.ResourceNameMemory]; found { - maxLimits[cloudprovider.ResourceNameMemory] = maxLimits[cloudprovider.ResourceNameMemory] * units.Gigabyte + maxLimits[cloudprovider.ResourceNameMemory] = maxLimits[cloudprovider.ResourceNameMemory] * units.GiB } return cloudprovider.NewResourceLimiter(minLimits, maxLimits) diff --git a/cluster-autoscaler/cloudprovider/gke/gke_manager.go b/cluster-autoscaler/cloudprovider/gke/gke_manager.go index 77843f8d5926..c4d90511a56c 100644 --- a/cluster-autoscaler/cloudprovider/gke/gke_manager.go +++ b/cluster-autoscaler/cloudprovider/gke/gke_manager.go @@ -29,6 +29,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce" "k8s.io/autoscaler/cluster-autoscaler/config/dynamic" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -622,7 +623,7 @@ func (m *gkeManagerImpl) getCpuAndMemoryForMachineType(machineType string, zone } m.cache.AddMachineToCache(machineType, zone, machine) } - return machine.GuestCpus, machine.MemoryMb * bytesPerMB, nil + return machine.GuestCpus, machine.MemoryMb * units.MiB, nil } func parseCustomMachineType(machineType string) (cpu, mem int64, err error) { @@ -636,6 +637,6 @@ func parseCustomMachineType(machineType string) (cpu, mem int64, err error) { return 0, 0, fmt.Errorf("failed to parse all params in %s", machineType) } // Mb to bytes - mem = mem * bytesPerMB + mem = mem * units.MiB return } diff --git a/cluster-autoscaler/cloudprovider/gke/gke_manager_test.go b/cluster-autoscaler/cloudprovider/gke/gke_manager_test.go index 58fa1191de2c..997dc04da5cf 100644 --- a/cluster-autoscaler/cloudprovider/gke/gke_manager_test.go +++ b/cluster-autoscaler/cloudprovider/gke/gke_manager_test.go @@ -24,6 +24,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce" "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -1158,14 +1159,14 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) { cpu, mem, err := g.getCpuAndMemoryForMachineType("custom-8-2", zoneB) assert.NoError(t, err) assert.Equal(t, int64(8), cpu) - assert.Equal(t, int64(2*bytesPerMB), mem) + assert.Equal(t, int64(2*units.MiB), mem) mock.AssertExpectationsForObjects(t, server) // Standard machine type found in cache. cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-1", zoneB) assert.NoError(t, err) assert.Equal(t, int64(1), cpu) - assert.Equal(t, int64(1*bytesPerMB), mem) + assert.Equal(t, int64(1*units.MiB), mem) mock.AssertExpectationsForObjects(t, server) // Standard machine type not found in cache. @@ -1173,14 +1174,14 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) { cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB) assert.NoError(t, err) assert.Equal(t, int64(2), cpu) - assert.Equal(t, int64(3840*bytesPerMB), mem) + assert.Equal(t, int64(3840*units.MiB), mem) mock.AssertExpectationsForObjects(t, server) // Standard machine type cached. cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB) assert.NoError(t, err) assert.Equal(t, int64(2), cpu) - assert.Equal(t, int64(3840*bytesPerMB), mem) + assert.Equal(t, int64(3840*units.MiB), mem) mock.AssertExpectationsForObjects(t, server) // Standard machine type not found in the zone. @@ -1195,7 +1196,7 @@ func TestParseCustomMachineType(t *testing.T) { cpu, mem, err := parseCustomMachineType("custom-2-2816") assert.NoError(t, err) assert.Equal(t, int64(2), cpu) - assert.Equal(t, int64(2816*bytesPerMB), mem) + assert.Equal(t, int64(2816*units.MiB), mem) cpu, mem, err = parseCustomMachineType("other-a2-2816") assert.Error(t, err) cpu, mem, err = parseCustomMachineType("other-2-2816") diff --git a/cluster-autoscaler/cloudprovider/gke/templates.go b/cluster-autoscaler/cloudprovider/gke/templates.go index 1a8c9c4dfeb5..3c75588d472e 100644 --- a/cluster-autoscaler/cloudprovider/gke/templates.go +++ b/cluster-autoscaler/cloudprovider/gke/templates.go @@ -22,22 +22,17 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce" + "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" ) const ( mbPerGB = 1000 - bytesPerMB = 1000 * 1000 millicoresPerCore = 1000 - // Kubelet "evictionHard: {memory.available}" is subtracted from - // capacity when calculating allocatable (on top of kube-reserved). - // We don't have a good place to get it from, but it has been hard-coded - // to 100Mi since at least k8s 1.4. - kubeletEvictionHardMemory = 100 * 1024 * 1024 ) // GkeTemplateBuilder builds templates for GKE cloud provider. @@ -61,7 +56,7 @@ func (t *GkeTemplateBuilder) BuildNodeFromMigSpec(mig *GkeMig, cpu int64, mem in Labels: map[string]string{}, } - capacity, err := t.BuildCapacity(mig.Spec().MachineType, nil, mig.GceRef().Zone, cpu, mem) + capacity, err := t.BuildCapacity(cpu, mem, nil) if err != nil { return nil, err } @@ -91,13 +86,10 @@ func (t *GkeTemplateBuilder) BuildNodeFromMigSpec(mig *GkeMig, cpu int64, mem in // BuildAllocatableFromCapacity builds node allocatable based only on node capacity. // Calculates reserved as a ratio of capacity. See calculateReserved for more details func (t *GkeTemplateBuilder) BuildAllocatableFromCapacity(capacity apiv1.ResourceList) apiv1.ResourceList { - memoryReserved := memoryReservedMB(capacity.Memory().Value() / bytesPerMB) + memoryReserved := memoryReservedMB(capacity.Memory().Value()/units.MiB) * units.MiB cpuReserved := cpuReservedMillicores(capacity.Cpu().MilliValue()) reserved := apiv1.ResourceList{} reserved[apiv1.ResourceCPU] = *resource.NewMilliQuantity(cpuReserved, resource.DecimalSI) - // Duplicating an upstream bug treating MB as MiB (we need to predict the end result accurately). - memoryReserved = memoryReserved * 1024 * 1024 - memoryReserved += kubeletEvictionHardMemory reserved[apiv1.ResourceMemory] = *resource.NewQuantity(memoryReserved, resource.BinarySI) return t.CalculateAllocatable(capacity, reserved) } diff --git a/cluster-autoscaler/cloudprovider/gke/templates_test.go b/cluster-autoscaler/cloudprovider/gke/templates_test.go index 0b3b854103ad..b921a90d6322 100644 --- a/cluster-autoscaler/cloudprovider/gke/templates_test.go +++ b/cluster-autoscaler/cloudprovider/gke/templates_test.go @@ -23,6 +23,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce" gpuUtils "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" gce_api "google.golang.org/api/compute/v1" apiv1 "k8s.io/api/core/v1" @@ -35,104 +36,93 @@ import ( func TestBuildNodeFromTemplateSetsResources(t *testing.T) { type testCase struct { - kubeEnv string - name string - machineType string - accelerators []*gce_api.AcceleratorConfig - mig gce.Mig - capacityCpu int64 - capacityMemory int64 - allocatableCpu string - allocatableMemory string - gpuCount int64 - expectedErr bool + scenario string + kubeEnv string + accelerators []*gce_api.AcceleratorConfig + mig gce.Mig + physicalCpu int64 + physicalMemory int64 + kubeReserved bool + reservedCpu string + reservedMemory string + expectedGpuCount int64 + expectedErr bool } - testCases := []testCase{{ - kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" + - "NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" + - "DNS_SERVER_IP: '10.0.0.10'\n" + - fmt.Sprintf("KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=%v\n", 1024*1024) + - "NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n", - name: "nodeName", - machineType: "custom-8-2", - accelerators: []*gce_api.AcceleratorConfig{ - {AcceleratorType: "nvidia-tesla-k80", AcceleratorCount: 3}, - {AcceleratorType: "nvidia-tesla-p100", AcceleratorCount: 8}, - }, - mig: &GkeMig{ - gceRef: gce.GceRef{ - Name: "some-name", - Project: "some-proj", - Zone: "us-central1-b", + testCases := []testCase{ + { + scenario: "kube-reserved present in kube-env", + kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" + + "NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" + + "DNS_SERVER_IP: '10.0.0.10'\n" + + fmt.Sprintf("KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=%v\n", 1024*1024) + + "NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n", + accelerators: []*gce_api.AcceleratorConfig{ + {AcceleratorType: "nvidia-tesla-k80", AcceleratorCount: 3}, + {AcceleratorType: "nvidia-tesla-p100", AcceleratorCount: 8}, }, + physicalCpu: 8, + physicalMemory: 200 * units.MiB, + kubeReserved: true, + reservedCpu: "1000m", + reservedMemory: fmt.Sprintf("%v", 1024*1024), + expectedGpuCount: 11, + expectedErr: false, }, - capacityCpu: 8, - capacityMemory: 200 * 1024 * 1024, - allocatableCpu: "7000m", - allocatableMemory: fmt.Sprintf("%v", 99*1024*1024), - gpuCount: 11, - expectedErr: false, - }, { + scenario: "no kube-reserved in kube-env", kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" + "NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" + "DNS_SERVER_IP: '10.0.0.10'\n" + "NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n", - name: "nodeName", - machineType: "custom-8-2", - mig: &GkeMig{ - gceRef: gce.GceRef{ - Name: "some-name", - Project: "some-proj", - Zone: "us-central1-b", - }, - }, - capacityCpu: 8, - capacityMemory: 2 * 1024 * 1024, - allocatableCpu: "8000m", - allocatableMemory: fmt.Sprintf("%v", 2*1024*1024), - expectedErr: false, + physicalCpu: 8, + physicalMemory: 200 * units.MiB, + kubeReserved: false, + expectedGpuCount: 11, + expectedErr: false, }, { + scenario: "totally messed up kube-env", kubeEnv: "This kube-env is totally messed up", - name: "nodeName", - machineType: "custom-8-2", - mig: &GkeMig{ + expectedErr: true, + }, + } + for _, tc := range testCases { + t.Run(tc.scenario, func(t *testing.T) { + tb := &GkeTemplateBuilder{} + mig := &GkeMig{ gceRef: gce.GceRef{ Name: "some-name", Project: "some-proj", Zone: "us-central1-b", }, - }, - expectedErr: true, - }, - } - for _, tc := range testCases { - tb := &GkeTemplateBuilder{} - template := &gce_api.InstanceTemplate{ - Name: tc.name, - Properties: &gce_api.InstanceProperties{ - GuestAccelerators: tc.accelerators, - Metadata: &gce_api.Metadata{ - Items: []*gce_api.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}}, + } + template := &gce_api.InstanceTemplate{ + Name: "node-name", + Properties: &gce_api.InstanceProperties{ + GuestAccelerators: tc.accelerators, + Metadata: &gce_api.Metadata{ + Items: []*gce_api.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}}, + }, + MachineType: "irrelevant-type", }, - MachineType: tc.machineType, - }, - } - node, err := tb.BuildNodeFromTemplate(tc.mig, template, tc.capacityCpu, tc.capacityMemory) - if tc.expectedErr { - assert.Error(t, err) - } else { - assert.NoError(t, err) - podsQuantity, _ := resource.ParseQuantity("110") - capacity, err := makeResourceList(fmt.Sprintf("%dm", tc.capacityCpu*1000), fmt.Sprintf("%v", tc.capacityMemory), tc.gpuCount) - capacity[apiv1.ResourcePods] = podsQuantity - assert.NoError(t, err) - allocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, tc.gpuCount) - allocatable[apiv1.ResourcePods] = podsQuantity - assert.NoError(t, err) - assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity) - assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable) - } + } + node, err := tb.BuildNodeFromTemplate(mig, template, tc.physicalCpu, tc.physicalMemory) + if tc.expectedErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, tc.accelerators) + assert.NoError(t, err) + assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity) + if !tc.kubeReserved { + assertEqualResourceLists(t, "Allocatable", capacity, node.Status.Allocatable) + } else { + reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0) + assert.NoError(t, err) + allocatable := tb.CalculateAllocatable(capacity, reserved) + assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable) + } + } + }) } } @@ -247,17 +237,17 @@ func TestBuildAllocatableFromCapacity(t *testing.T) { } testCases := []testCase{{ capacityCpu: "16000m", - capacityMemory: fmt.Sprintf("%v", 1*mbPerGB*bytesPerMB), + capacityMemory: fmt.Sprintf("%v", 1*units.GB), allocatableCpu: "15890m", // Below threshold for reserving memory - allocatableMemory: fmt.Sprintf("%v", 1*mbPerGB*bytesPerMB-kubeletEvictionHardMemory), + allocatableMemory: fmt.Sprintf("%v", 1*units.GB-gce.KubeletEvictionHardMemory), gpuCount: 1, }, { capacityCpu: "500m", - capacityMemory: fmt.Sprintf("%v", 1.1*mbPerGB*bytesPerMB), + capacityMemory: fmt.Sprintf("%v", 200*1000*units.MiB), allocatableCpu: "470m", - // Final 1024*1024 because we're duplicating upstream bug using MB as MiB - allocatableMemory: fmt.Sprintf("%v", 1.1*mbPerGB*bytesPerMB-0.25*1.1*mbPerGB*1024*1024-kubeletEvictionHardMemory), + // 10760 = 0.25*4G + 0.2*4G + 0.1*8G + 0.06*112G + 0.02*72G + allocatableMemory: fmt.Sprintf("%v", (200*1000-10760)*units.MiB-gce.KubeletEvictionHardMemory), }} for _, tc := range testCases { tb := GkeTemplateBuilder{} @@ -313,19 +303,19 @@ func TestCalculateReserved(t *testing.T) { { name: "between memory thresholds", function: memoryReservedMB, - capacity: 2 * mbPerGB, + capacity: 2000, expectedReserved: 500, // 0.5 Gb }, { name: "at a memory threshold boundary", function: memoryReservedMB, - capacity: 8 * mbPerGB, + capacity: 8000, expectedReserved: 1800, // 1.8 Gb }, { name: "exceeds highest memory threshold", function: memoryReservedMB, - capacity: 200 * mbPerGB, + capacity: 200000, expectedReserved: 10760, // 10.8 Gb }, { diff --git a/cluster-autoscaler/core/scale_down_test.go b/cluster-autoscaler/core/scale_down_test.go index 886d86c719a4..9fc8952c0a1f 100644 --- a/cluster-autoscaler/core/scale_down_test.go +++ b/cluster-autoscaler/core/scale_down_test.go @@ -819,7 +819,7 @@ var defaultScaleDownOptions = config.AutoscalingOptions{ MinCoresTotal: 0, MinMemoryTotal: 0, MaxCoresTotal: config.DefaultMaxClusterCores, - MaxMemoryTotal: config.DefaultMaxClusterMemory * units.Gigabyte, + MaxMemoryTotal: config.DefaultMaxClusterMemory * units.GiB, } func TestScaleDownEmptyMultipleNodeGroups(t *testing.T) { @@ -864,13 +864,13 @@ func TestScaleDownEmptyMinCoresLimitHit(t *testing.T) { func TestScaleDownEmptyMinMemoryLimitHit(t *testing.T) { options := defaultScaleDownOptions - options.MinMemoryTotal = 4000 * MB + options.MinMemoryTotal = 4000 * MiB config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 1000 * MB, 0, true, "ng1"}, - {"n2", 1000, 1000 * MB, 0, true, "ng1"}, - {"n3", 1000, 1000 * MB, 0, true, "ng1"}, - {"n4", 1000, 3000 * MB, 0, true, "ng1"}, + {"n1", 2000, 1000 * MiB, 0, true, "ng1"}, + {"n2", 1000, 1000 * MiB, 0, true, "ng1"}, + {"n3", 1000, 1000 * MiB, 0, true, "ng1"}, + {"n4", 1000, 3000 * MiB, 0, true, "ng1"}, }, options: options, expectedScaleDowns: []string{"n1", "n2"}, @@ -894,12 +894,12 @@ func TestScaleDownEmptyMinGpuLimitHit(t *testing.T) { } config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 1000, 1000 * MB, 1, true, "ng1"}, - {"n2", 1000, 1000 * MB, 1, true, "ng1"}, - {"n3", 1000, 1000 * MB, 1, true, "ng1"}, - {"n4", 1000, 1000 * MB, 1, true, "ng1"}, - {"n5", 1000, 1000 * MB, 1, true, "ng1"}, - {"n6", 1000, 1000 * MB, 1, true, "ng1"}, + {"n1", 1000, 1000 * MiB, 1, true, "ng1"}, + {"n2", 1000, 1000 * MiB, 1, true, "ng1"}, + {"n3", 1000, 1000 * MiB, 1, true, "ng1"}, + {"n4", 1000, 1000 * MiB, 1, true, "ng1"}, + {"n5", 1000, 1000 * MiB, 1, true, "ng1"}, + {"n6", 1000, 1000 * MiB, 1, true, "ng1"}, }, options: options, expectedScaleDowns: []string{"n1", "n2"}, @@ -1200,13 +1200,13 @@ func getCountOfChan(c chan string) int { func TestCalculateCoresAndMemoryTotal(t *testing.T) { nodeConfigs := []nodeConfig{ - {"n1", 2000, 7500 * MB, 0, true, "ng1"}, - {"n2", 2000, 7500 * MB, 0, true, "ng1"}, - {"n3", 2000, 7500 * MB, 0, true, "ng1"}, - {"n4", 12000, 8000 * MB, 0, true, "ng1"}, - {"n5", 16000, 7500 * MB, 0, true, "ng1"}, - {"n6", 8000, 6000 * MB, 0, true, "ng1"}, - {"n7", 6000, 16000 * MB, 0, true, "ng1"}, + {"n1", 2000, 7500 * MiB, 0, true, "ng1"}, + {"n2", 2000, 7500 * MiB, 0, true, "ng1"}, + {"n3", 2000, 7500 * MiB, 0, true, "ng1"}, + {"n4", 12000, 8000 * MiB, 0, true, "ng1"}, + {"n5", 16000, 7500 * MiB, 0, true, "ng1"}, + {"n6", 8000, 6000 * MiB, 0, true, "ng1"}, + {"n7", 6000, 16000 * MiB, 0, true, "ng1"}, } nodes := make([]*apiv1.Node, len(nodeConfigs)) for i, n := range nodeConfigs { @@ -1226,7 +1226,7 @@ func TestCalculateCoresAndMemoryTotal(t *testing.T) { coresTotal, memoryTotal := calculateScaleDownCoresMemoryTotal(nodes, time.Now()) assert.Equal(t, int64(42), coresTotal) - assert.Equal(t, int64(44000*MB), memoryTotal) + assert.Equal(t, int64(44000*MiB), memoryTotal) } func TestFilterOutMasters(t *testing.T) { diff --git a/cluster-autoscaler/core/scale_up_test.go b/cluster-autoscaler/core/scale_up_test.go index 5e6990ab3a4b..11ff8e8e76c9 100644 --- a/cluster-autoscaler/core/scale_up_test.go +++ b/cluster-autoscaler/core/scale_up_test.go @@ -46,7 +46,7 @@ import ( var defaultOptions = config.AutoscalingOptions{ EstimatorName: estimator.BinpackingEstimatorName, MaxCoresTotal: config.DefaultMaxClusterCores, - MaxMemoryTotal: config.DefaultMaxClusterMemory * units.Gigabyte, + MaxMemoryTotal: config.DefaultMaxClusterMemory * units.GiB, MinCoresTotal: 0, MinMemoryTotal: 0, } @@ -120,24 +120,22 @@ func TestScaleUpMaxCoresLimitHitWithNotAutoscaledGroup(t *testing.T) { simpleScaleUpTest(t, config) } -const MB = 1024 * 1024 - func TestScaleUpMaxMemoryLimitHit(t *testing.T) { options := defaultOptions - options.MaxMemoryTotal = 1300 * MB + options.MaxMemoryTotal = 1300 * MiB config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 100 * MB, 0, true, "ng1"}, - {"n2", 4000, 1000 * MB, 0, true, "ng2"}, + {"n1", 2000, 100 * MiB, 0, true, "ng1"}, + {"n2", 4000, 1000 * MiB, 0, true, "ng2"}, }, pods: []podConfig{ {"p1", 1000, 0, 0, "n1"}, {"p2", 3000, 0, 0, "n2"}, }, extraPods: []podConfig{ - {"p-new-1", 2000, 100 * MB, 0, ""}, - {"p-new-2", 2000, 100 * MB, 0, ""}, - {"p-new-3", 2000, 100 * MB, 0, ""}, + {"p-new-1", 2000, 100 * MiB, 0, ""}, + {"p-new-2", 2000, 100 * MiB, 0, ""}, + {"p-new-3", 2000, 100 * MiB, 0, ""}, }, scaleUpOptionToChoose: groupSizeChange{groupName: "ng1", sizeChange: 3}, expectedFinalScaleUp: groupSizeChange{groupName: "ng1", sizeChange: 2}, @@ -149,20 +147,20 @@ func TestScaleUpMaxMemoryLimitHit(t *testing.T) { func TestScaleUpMaxMemoryLimitHitWithNotAutoscaledGroup(t *testing.T) { options := defaultOptions - options.MaxMemoryTotal = 1300 * MB + options.MaxMemoryTotal = 1300 * MiB config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 100 * MB, 0, true, "ng1"}, - {"n2", 4000, 1000 * MB, 0, true, ""}, + {"n1", 2000, 100 * MiB, 0, true, "ng1"}, + {"n2", 4000, 1000 * MiB, 0, true, ""}, }, pods: []podConfig{ {"p1", 1000, 0, 0, "n1"}, {"p2", 3000, 0, 0, "n2"}, }, extraPods: []podConfig{ - {"p-new-1", 2000, 100 * MB, 0, ""}, - {"p-new-2", 2000, 100 * MB, 0, ""}, - {"p-new-3", 2000, 100 * MB, 0, ""}, + {"p-new-1", 2000, 100 * MiB, 0, ""}, + {"p-new-2", 2000, 100 * MiB, 0, ""}, + {"p-new-3", 2000, 100 * MiB, 0, ""}, }, scaleUpOptionToChoose: groupSizeChange{groupName: "ng1", sizeChange: 3}, expectedFinalScaleUp: groupSizeChange{groupName: "ng1", sizeChange: 2}, @@ -177,17 +175,17 @@ func TestScaleUpCapToMaxTotalNodesLimit(t *testing.T) { options.MaxNodesTotal = 3 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 100 * MB, 0, true, "ng1"}, - {"n2", 4000, 1000 * MB, 0, true, "ng2"}, + {"n1", 2000, 100 * MiB, 0, true, "ng1"}, + {"n2", 4000, 1000 * MiB, 0, true, "ng2"}, }, pods: []podConfig{ {"p1", 1000, 0, 0, "n1"}, {"p2", 3000, 0, 0, "n2"}, }, extraPods: []podConfig{ - {"p-new-1", 4000, 100 * MB, 0, ""}, - {"p-new-2", 4000, 100 * MB, 0, ""}, - {"p-new-3", 4000, 100 * MB, 0, ""}, + {"p-new-1", 4000, 100 * MiB, 0, ""}, + {"p-new-2", 4000, 100 * MiB, 0, ""}, + {"p-new-3", 4000, 100 * MiB, 0, ""}, }, scaleUpOptionToChoose: groupSizeChange{groupName: "ng2", sizeChange: 3}, expectedFinalScaleUp: groupSizeChange{groupName: "ng2", sizeChange: 1}, @@ -202,17 +200,17 @@ func TestScaleUpCapToMaxTotalNodesLimitWithNotAutoscaledGroup(t *testing.T) { options.MaxNodesTotal = 3 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"n1", 2000, 100 * MB, 0, true, ""}, - {"n2", 4000, 1000 * MB, 0, true, "ng2"}, + {"n1", 2000, 100 * MiB, 0, true, ""}, + {"n2", 4000, 1000 * MiB, 0, true, "ng2"}, }, pods: []podConfig{ {"p1", 1000, 0, 0, "n1"}, {"p2", 3000, 0, 0, "n2"}, }, extraPods: []podConfig{ - {"p-new-1", 4000, 100 * MB, 0, ""}, - {"p-new-2", 4000, 100 * MB, 0, ""}, - {"p-new-3", 4000, 100 * MB, 0, ""}, + {"p-new-1", 4000, 100 * MiB, 0, ""}, + {"p-new-2", 4000, 100 * MiB, 0, ""}, + {"p-new-3", 4000, 100 * MiB, 0, ""}, }, scaleUpOptionToChoose: groupSizeChange{groupName: "ng2", sizeChange: 3}, expectedFinalScaleUp: groupSizeChange{groupName: "ng2", sizeChange: 1}, @@ -227,15 +225,15 @@ func TestWillConsiderGpuAndStandardPoolForPodWhichDoesNotRequireGpu(t *testing.T options.MaxNodesTotal = 100 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"gpu-node-1", 2000, 1000 * MB, 1, true, "gpu-pool"}, - {"std-node-1", 2000, 1000 * MB, 0, true, "std-pool"}, + {"gpu-node-1", 2000, 1000 * MiB, 1, true, "gpu-pool"}, + {"std-node-1", 2000, 1000 * MiB, 0, true, "std-pool"}, }, pods: []podConfig{ - {"gpu-pod-1", 2000, 1000 * MB, 1, "gpu-node-1"}, - {"std-pod-1", 2000, 1000 * MB, 0, "std-node-1"}, + {"gpu-pod-1", 2000, 1000 * MiB, 1, "gpu-node-1"}, + {"std-pod-1", 2000, 1000 * MiB, 0, "std-node-1"}, }, extraPods: []podConfig{ - {"extra-std-pod", 2000, 1000 * MB, 0, ""}, + {"extra-std-pod", 2000, 1000 * MiB, 0, ""}, }, expectedScaleUpOptions: []groupSizeChange{ {groupName: "std-pool", sizeChange: 1}, @@ -254,15 +252,15 @@ func TestWillConsiderOnlyGpuPoolForPodWhichDoesRequiresGpu(t *testing.T) { options.MaxNodesTotal = 100 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"gpu-node-1", 2000, 1000 * MB, 1, true, "gpu-pool"}, - {"std-node-1", 2000, 1000 * MB, 0, true, "std-pool"}, + {"gpu-node-1", 2000, 1000 * MiB, 1, true, "gpu-pool"}, + {"std-node-1", 2000, 1000 * MiB, 0, true, "std-pool"}, }, pods: []podConfig{ - {"gpu-pod-1", 2000, 1000 * MB, 1, "gpu-node-1"}, - {"std-pod-1", 2000, 1000 * MB, 0, "std-node-1"}, + {"gpu-pod-1", 2000, 1000 * MiB, 1, "gpu-node-1"}, + {"std-pod-1", 2000, 1000 * MiB, 0, "std-node-1"}, }, extraPods: []podConfig{ - {"extra-gpu-pod", 2000, 1000 * MB, 1, ""}, + {"extra-gpu-pod", 2000, 1000 * MiB, 1, ""}, }, expectedScaleUpOptions: []groupSizeChange{ {groupName: "gpu-pool", sizeChange: 1}, @@ -280,21 +278,21 @@ func TestWillConsiderAllPoolsWhichFitTwoPodsRequiringGpus(t *testing.T) { options.MaxNodesTotal = 100 config := &scaleTestConfig{ nodes: []nodeConfig{ - {"gpu-1-node-1", 2000, 1000 * MB, 1, true, "gpu-1-pool"}, - {"gpu-2-node-1", 2000, 1000 * MB, 2, true, "gpu-2-pool"}, - {"gpu-4-node-1", 2000, 1000 * MB, 4, true, "gpu-4-pool"}, - {"std-node-1", 2000, 1000 * MB, 0, true, "std-pool"}, + {"gpu-1-node-1", 2000, 1000 * MiB, 1, true, "gpu-1-pool"}, + {"gpu-2-node-1", 2000, 1000 * MiB, 2, true, "gpu-2-pool"}, + {"gpu-4-node-1", 2000, 1000 * MiB, 4, true, "gpu-4-pool"}, + {"std-node-1", 2000, 1000 * MiB, 0, true, "std-pool"}, }, pods: []podConfig{ - {"gpu-pod-1", 2000, 1000 * MB, 1, "gpu-1-node-1"}, - {"gpu-pod-2", 2000, 1000 * MB, 2, "gpu-2-node-1"}, - {"gpu-pod-3", 2000, 1000 * MB, 4, "gpu-4-node-1"}, - {"std-pod-1", 2000, 1000 * MB, 0, "std-node-1"}, + {"gpu-pod-1", 2000, 1000 * MiB, 1, "gpu-1-node-1"}, + {"gpu-pod-2", 2000, 1000 * MiB, 2, "gpu-2-node-1"}, + {"gpu-pod-3", 2000, 1000 * MiB, 4, "gpu-4-node-1"}, + {"std-pod-1", 2000, 1000 * MiB, 0, "std-node-1"}, }, extraPods: []podConfig{ - {"extra-gpu-pod-1", 1, 1 * MB, 1, ""}, // CPU and mem negligible - {"extra-gpu-pod-2", 1, 1 * MB, 1, ""}, // CPU and mem negligible - {"extra-gpu-pod-3", 1, 1 * MB, 1, ""}, // CPU and mem negligible + {"extra-gpu-pod-1", 1, 1 * MiB, 1, ""}, // CPU and mem negligible + {"extra-gpu-pod-2", 1, 1 * MiB, 1, ""}, // CPU and mem negligible + {"extra-gpu-pod-3", 1, 1 * MiB, 1, ""}, // CPU and mem negligible }, expectedScaleUpOptions: []groupSizeChange{ {groupName: "gpu-1-pool", sizeChange: 3}, diff --git a/cluster-autoscaler/core/utils_test.go b/cluster-autoscaler/core/utils_test.go index db78d72916dd..e125c3423180 100644 --- a/cluster-autoscaler/core/utils_test.go +++ b/cluster-autoscaler/core/utils_test.go @@ -43,6 +43,8 @@ import ( schedulercache "k8s.io/kubernetes/pkg/scheduler/cache" ) +const MiB = 1024 * 1024 + func TestPodSchedulableMap(t *testing.T) { rc1 := apiv1.ReplicationController{ ObjectMeta: metav1.ObjectMeta{ @@ -661,19 +663,19 @@ func TestConfigurePredicateCheckerForLoop(t *testing.T) { } func TestGetNodeResource(t *testing.T) { - node := BuildTestNode("n1", 1000, 2*MB) + node := BuildTestNode("n1", 1000, 2*MiB) cores := getNodeResource(node, apiv1.ResourceCPU) assert.Equal(t, int64(1), cores) memory := getNodeResource(node, apiv1.ResourceMemory) - assert.Equal(t, int64(2*MB), memory) + assert.Equal(t, int64(2*MiB), memory) unknownResourceValue := getNodeResource(node, "unknown resource") assert.Equal(t, int64(0), unknownResourceValue) // if we have no resources in capacity we expect getNodeResource to return 0 - nodeWithMissingCapacity := BuildTestNode("n1", 1000, 2*MB) + nodeWithMissingCapacity := BuildTestNode("n1", 1000, 2*MiB) nodeWithMissingCapacity.Status.Capacity = apiv1.ResourceList{} cores = getNodeResource(nodeWithMissingCapacity, apiv1.ResourceCPU) @@ -683,7 +685,7 @@ func TestGetNodeResource(t *testing.T) { assert.Equal(t, int64(0), memory) // if we have negative values in resources we expect getNodeResource to return 0 - nodeWithNegativeCapacity := BuildTestNode("n1", -1000, -2*MB) + nodeWithNegativeCapacity := BuildTestNode("n1", -1000, -2*MiB) nodeWithNegativeCapacity.Status.Capacity = apiv1.ResourceList{} cores = getNodeResource(nodeWithNegativeCapacity, apiv1.ResourceCPU) @@ -695,14 +697,14 @@ func TestGetNodeResource(t *testing.T) { } func TestGetNodeCoresAndMemory(t *testing.T) { - node := BuildTestNode("n1", 2000, 2048*MB) + node := BuildTestNode("n1", 2000, 2048*MiB) cores, memory := getNodeCoresAndMemory(node) assert.Equal(t, int64(2), cores) - assert.Equal(t, int64(2048*MB), memory) + assert.Equal(t, int64(2048*MiB), memory) // if we have no cpu/memory defined in capacity we expect getNodeCoresAndMemory to return 0s - nodeWithMissingCapacity := BuildTestNode("n1", 1000, 2*MB) + nodeWithMissingCapacity := BuildTestNode("n1", 1000, 2*MiB) nodeWithMissingCapacity.Status.Capacity = apiv1.ResourceList{} cores, memory = getNodeCoresAndMemory(nodeWithMissingCapacity) diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index ce5d2920c2d5..87b24d621462 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -174,8 +174,8 @@ func createAutoscalingOptions() config.AutoscalingOptions { klog.Fatalf("Failed to parse flags: %v", err) } // Convert memory limits to bytes. - minMemoryTotal = minMemoryTotal * units.Gigabyte - maxMemoryTotal = maxMemoryTotal * units.Gigabyte + minMemoryTotal = minMemoryTotal * units.GiB + maxMemoryTotal = maxMemoryTotal * units.GiB parsedGpuTotal, err := parseMultipleGpuLimits(*gpuTotal) if err != nil { diff --git a/cluster-autoscaler/utils/units/units.go b/cluster-autoscaler/utils/units/units.go index 16a04e183e5c..9dc0b80676b1 100644 --- a/cluster-autoscaler/utils/units/units.go +++ b/cluster-autoscaler/utils/units/units.go @@ -17,6 +17,12 @@ limitations under the License. package units const ( - // Gigabyte is 2^30 bytes. - Gigabyte = 1024 * 1024 * 1024 + // GB - GigaByte size (10^9) + GB = 1000 * 1000 * 1000 + // GiB - GibiByte size (2^30) + GiB = 1024 * 1024 * 1024 + // MB - MegaByte size (10^6) + MB = 1000 * 1000 + // MiB - MibiByte size (2^20) + MiB = 1024 * 1024 )