Skip to content

Commit

Permalink
Account for kernel reserved memory in capacity calculations
Browse files Browse the repository at this point in the history
  • Loading branch information
jkaniuk committed Feb 1, 2019
1 parent 2149e1b commit 3d35b87
Show file tree
Hide file tree
Showing 15 changed files with 423 additions and 288 deletions.
5 changes: 3 additions & 2 deletions cluster-autoscaler/cloudprovider/gce/gce_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -478,7 +479,7 @@ func (m *gceManagerImpl) getCpuAndMemoryForMachineType(machineType string, zone
}
m.cache.AddMachineToCache(machineType, zone, machine)
}
return machine.GuestCpus, machine.MemoryMb * bytesPerMB, nil
return machine.GuestCpus, machine.MemoryMb * units.MiB, nil
}

func parseCustomMachineType(machineType string) (cpu, mem int64, err error) {
Expand All @@ -492,6 +493,6 @@ func parseCustomMachineType(machineType string) (cpu, mem int64, err error) {
return 0, 0, fmt.Errorf("failed to parse all params in %s", machineType)
}
// Mb to bytes
mem = mem * bytesPerMB
mem = mem * units.MiB
return
}
11 changes: 6 additions & 5 deletions cluster-autoscaler/cloudprovider/gce/gce_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"time"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"

. "k8s.io/autoscaler/cluster-autoscaler/utils/test"

Expand Down Expand Up @@ -1092,29 +1093,29 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) {
cpu, mem, err := g.getCpuAndMemoryForMachineType("custom-8-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(8), cpu)
assert.Equal(t, int64(2*bytesPerMB), mem)
assert.Equal(t, int64(2*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)

// Standard machine type found in cache.
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-1", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(1), cpu)
assert.Equal(t, int64(1*bytesPerMB), mem)
assert.Equal(t, int64(1*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)

// Standard machine type not found in cache.
server.On("handle", "/project1/zones/"+zoneB+"/machineTypes/n1-standard-2").Return(getMachineTypeResponse).Once()
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(3840*bytesPerMB), mem)
assert.Equal(t, int64(3840*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)

// Standard machine type cached.
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(3840*bytesPerMB), mem)
assert.Equal(t, int64(3840*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)

// Standard machine type not found in the zone.
Expand All @@ -1129,7 +1130,7 @@ func TestParseCustomMachineType(t *testing.T) {
cpu, mem, err := parseCustomMachineType("custom-2-2816")
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(2816*bytesPerMB), mem)
assert.Equal(t, int64(2816*units.MiB), mem)
cpu, mem, err = parseCustomMachineType("other-a2-2816")
assert.Error(t, err)
cpu, mem, err = parseCustomMachineType("other-2-2816")
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/cloudprovider/gce/gce_price_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func getBasePrice(resources apiv1.ResourceList, startTime time.Time, endTime tim
cpu := resources[apiv1.ResourceCPU]
mem := resources[apiv1.ResourceMemory]
price += float64(cpu.MilliValue()) / 1000.0 * cpuPricePerHour * hours
price += float64(mem.Value()) / float64(units.Gigabyte) * memoryPricePerHourPerGb * hours
price += float64(mem.Value()) / float64(units.GiB) * memoryPricePerHourPerGb * hours
return price
}

Expand Down
50 changes: 35 additions & 15 deletions cluster-autoscaler/cloudprovider/gce/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,33 @@ import (
"strings"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"

gce "google.golang.org/api/compute/v1"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"

"github.com/ghodss/yaml"
"k8s.io/klog"
)

const (
mbPerGB = 1000
bytesPerMB = 1000 * 1000
millicoresPerCore = 1000
// Kubelet "evictionHard: {memory.available}" is subtracted from
// capacity when calculating allocatable (on top of kube-reserved).
// KubeletEvictionHardMemory is subtracted from capacity
// when calculating allocatable (on top of kube-reserved).
// Equals kubelet "evictionHard: {memory.available}"
// We don't have a good place to get it from, but it has been hard-coded
// to 100Mi since at least k8s 1.4.
kubeletEvictionHardMemory = 100 * 1024 * 1024
KubeletEvictionHardMemory = 100 * units.MiB

// Kernel reserved memory is subtracted when calculating total memory.
kernelReservedRatio = 64
kernelReservedMemory = 16 * units.MiB
// Reserved memory for software IO TLB
swiotlbReservedMemory = 64 * units.MiB
swiotlbThresholdMemory = 3 * units.GiB
)

// GceTemplateBuilder builds templates for GCE nodes.
Expand All @@ -59,13 +65,14 @@ func (t *GceTemplateBuilder) getAcceleratorCount(accelerators []*gce.Accelerator
return count
}

// BuildCapacity builds a list of resource capacities for a node.
func (t *GceTemplateBuilder) BuildCapacity(machineType string, accelerators []*gce.AcceleratorConfig, zone string, cpu int64, mem int64) (apiv1.ResourceList, error) {
// BuildCapacity builds a list of resource capacities given list of hardware.
func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []*gce.AcceleratorConfig) (apiv1.ResourceList, error) {
capacity := apiv1.ResourceList{}
// TODO: get a real value.
capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
capacity[apiv1.ResourceCPU] = *resource.NewQuantity(cpu, resource.DecimalSI)
capacity[apiv1.ResourceMemory] = *resource.NewQuantity(mem, resource.DecimalSI)
memTotal := mem - calculateKernelReserved(mem)
capacity[apiv1.ResourceMemory] = *resource.NewQuantity(memTotal, resource.DecimalSI)

if accelerators != nil && len(accelerators) > 0 {
capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(t.getAcceleratorCount(accelerators), resource.DecimalSI)
Expand All @@ -91,20 +98,20 @@ func (t *GceTemplateBuilder) BuildAllocatableFromKubeEnv(capacity apiv1.Resource
if err != nil {
return nil, err
}
if quantity, found := reserved[apiv1.ResourceMemory]; found {
reserved[apiv1.ResourceMemory] = *resource.NewQuantity(quantity.Value()+kubeletEvictionHardMemory, resource.BinarySI)
}
return t.CalculateAllocatable(capacity, reserved), nil
}

// CalculateAllocatable computes allocatable resources substracting reserved values from corresponding capacity.
// CalculateAllocatable computes allocatable resources subtracting reserved values from corresponding capacity.
func (t *GceTemplateBuilder) CalculateAllocatable(capacity, reserved apiv1.ResourceList) apiv1.ResourceList {
allocatable := apiv1.ResourceList{}
for key, value := range capacity {
quantity := *value.Copy()
if reservedQuantity, found := reserved[key]; found {
quantity.Sub(reservedQuantity)
}
if key == apiv1.ResourceMemory {
quantity.Sub(*resource.NewQuantity(KubeletEvictionHardMemory, resource.BinarySI))
}
allocatable[key] = quantity
}
return allocatable
Expand All @@ -126,7 +133,7 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
Labels: map[string]string{},
}

capacity, err := t.BuildCapacity(template.Properties.MachineType, template.Properties.GuestAccelerators, mig.GceRef().Zone, cpu, mem)
capacity, err := t.BuildCapacity(cpu, mem, template.Properties.GuestAccelerators)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -333,3 +340,16 @@ func buildTaints(kubeEnvTaints map[string]string) ([]apiv1.Taint, error) {
}
return taints, nil
}

// calculateKernelReserved computes how much memory Linux kernel will reserve.
// TODO: account for crashkernel reservation on RHEL / CentOS
func calculateKernelReserved(mem int64) int64 {
// Account for memory reserved by kernel
reserved := int64(mem / kernelReservedRatio)
reserved += kernelReservedMemory
// Account for software IO TLB allocation if memory requires 64bit addressing
if mem > swiotlbThresholdMemory {
reserved += swiotlbReservedMemory
}
return reserved
}
Loading

0 comments on commit 3d35b87

Please sign in to comment.