Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: Override --system-reserved when scheduling and in kubelet parameter startup #2254

Merged
merged 4 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions charts/karpenter/crds/karpenter.sh_provisioners.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ spec:
description: ContainerRuntime is the container runtime to be used
with your worker nodes.
type: string
systemReserved:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: SystemReserved contains resources reserved for OS
system daemons and kernel memory.
type: object
type: object
labels:
additionalProperties:
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/provisioning/v1alpha5/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ type KubeletConfiguration struct {
// ContainerRuntime is the container runtime to be used with your worker nodes.
// +optional
ContainerRuntime *string `json:"containerRuntime,omitempty"`
// SystemReserved contains resources reserved for OS system daemons and kernel memory.
SystemReserved v1.ResourceList `json:"systemReserved,omitempty"`
bwagner5 marked this conversation as resolved.
Show resolved Hide resolved
}

// Provisioner is the Schema for the Provisioners API
Expand Down
7 changes: 7 additions & 0 deletions pkg/apis/provisioning/v1alpha5/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 0 additions & 5 deletions pkg/cloudprovider/aws/amifamily/al2.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

"github.com/aws/aws-sdk-go/aws"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"

"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"
"github.com/aws/karpenter/pkg/cloudprovider"
Expand Down Expand Up @@ -90,10 +89,6 @@ func (a AL2) EphemeralBlockDevice() *string {
return aws.String("/dev/xvda")
}

func (a AL2) EphemeralBlockDeviceOverhead() resource.Quantity {
return resource.MustParse("5Gi")
}

func (a AL2) ENILimitedMemoryOverhead() bool {
return true
}
5 changes: 5 additions & 0 deletions pkg/cloudprovider/aws/amifamily/bootstrap/bottlerocket.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"encoding/base64"
"fmt"

"github.com/aws/karpenter/pkg/utils/resources"

"github.com/aws/aws-sdk-go/aws"
)

Expand All @@ -40,6 +42,9 @@ func (b Bottlerocket) Script() (string, error) {
if b.KubeletConfig != nil && len(b.KubeletConfig.ClusterDNS) > 0 {
s.Settings.Kubernetes.ClusterDNSIP = &b.KubeletConfig.ClusterDNS[0]
}
if b.KubeletConfig != nil {
s.Settings.Kubernetes.SystemReserved = resources.StringMap(b.KubeletConfig.SystemReserved)
}
if !b.AWSENILimitedPodDensity {
s.Settings.Kubernetes.MaxPods = aws.Int(110)
}
Expand Down
18 changes: 18 additions & 0 deletions pkg/cloudprovider/aws/amifamily/bootstrap/eksbootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ func (e EKS) Script() (string, error) {
userData.WriteString(" \\\n--use-max-pods false")
kubeletExtraArgs += " --max-pods=110"
}
if e.KubeletConfig != nil {
kubeletExtraArgs += e.systemReservedArg()
}
if e.ContainerRuntime != "" {
userData.WriteString(fmt.Sprintf(" \\\n--container-runtime %s", e.ContainerRuntime))
}
Expand Down Expand Up @@ -107,6 +110,21 @@ func (e EKS) nodeLabelArg() string {
return fmt.Sprintf("%s%s", nodeLabelArg, strings.Join(labelStrings, ","))
}

// systemReservedArg gets the kubelet-defined arguments for any valid resource
// values that are specified within the system reserved resource list
func (e EKS) systemReservedArg() string {
var args []string
if e.KubeletConfig.SystemReserved != nil {
for k, v := range e.KubeletConfig.SystemReserved {
args = append(args, fmt.Sprintf("%v=%v", k.String(), v.String()))
}
}
if len(args) > 0 {
return " --system-reserved=" + strings.Join(args, ",")
}
return ""
}

func (e EKS) mergeCustomUserData(userData *bytes.Buffer) (*bytes.Buffer, error) {
var outputBuffer bytes.Buffer
writer := multipart.NewWriter(&outputBuffer)
Expand Down
4 changes: 0 additions & 4 deletions pkg/cloudprovider/aws/amifamily/bottlerocket.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,6 @@ func (b Bottlerocket) EphemeralBlockDevice() *string {
return aws.String("/dev/xvdb")
}

func (b Bottlerocket) EphemeralBlockDeviceOverhead() resource.Quantity {
return resource.MustParse("5Gi")
}

func (b Bottlerocket) ENILimitedMemoryOverhead() bool {
return false
}
5 changes: 0 additions & 5 deletions pkg/cloudprovider/aws/amifamily/custom.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package amifamily

import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"

"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"
"github.com/aws/karpenter/pkg/cloudprovider"
Expand Down Expand Up @@ -53,10 +52,6 @@ func (c Custom) EphemeralBlockDevice() *string {
return nil
}

func (c Custom) EphemeralBlockDeviceOverhead() resource.Quantity {
return resource.MustParse("5Gi")
}

func (c Custom) ENILimitedMemoryOverhead() bool {
return true
}
1 change: 0 additions & 1 deletion pkg/cloudprovider/aws/amifamily/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ type AMIFamily interface {
DefaultBlockDeviceMappings() []*v1alpha1.BlockDeviceMapping
DefaultMetadataOptions() *v1alpha1.MetadataOptions
EphemeralBlockDevice() *string
EphemeralBlockDeviceOverhead() resource.Quantity
ENILimitedMemoryOverhead() bool
}

Expand Down
5 changes: 0 additions & 5 deletions pkg/cloudprovider/aws/amifamily/ubuntu.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

"github.com/aws/aws-sdk-go/aws"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"

"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"
"github.com/aws/karpenter/pkg/cloudprovider"
Expand Down Expand Up @@ -64,10 +63,6 @@ func (u Ubuntu) EphemeralBlockDevice() *string {
return aws.String("/dev/sda1")
}

func (u Ubuntu) EphemeralBlockDeviceOverhead() resource.Quantity {
return resource.MustParse("5Gi")
}

func (u Ubuntu) ENILimitedMemoryOverhead() bool {
return true
}
2 changes: 1 addition & 1 deletion pkg/cloudprovider/aws/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (c *CloudProvider) GetInstanceTypes(ctx context.Context, provisioner *v1alp
if err != nil {
return nil, err
}
instanceTypes, err := c.instanceTypeProvider.Get(ctx, aws)
instanceTypes, err := c.instanceTypeProvider.Get(ctx, aws, provisioner.Spec.KubeletConfiguration)
if err != nil {
return nil, err
}
Expand Down
84 changes: 55 additions & 29 deletions pkg/cloudprovider/aws/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ type InstanceType struct {
price float64
}

func NewInstanceType(ctx context.Context, info *ec2.InstanceTypeInfo, price float64, provider *v1alpha1.AWS, offerings []cloudprovider.Offering) *InstanceType {
func NewInstanceType(ctx context.Context, info *ec2.InstanceTypeInfo, kc *v1alpha5.KubeletConfiguration, price float64, provider *v1alpha1.AWS, offerings []cloudprovider.Offering) *InstanceType {
instanceType := &InstanceType{
InstanceTypeInfo: info,
provider: provider,
Expand All @@ -66,7 +66,7 @@ func NewInstanceType(ctx context.Context, info *ec2.InstanceTypeInfo, price floa
}
// Precompute to minimize memory/compute overhead
instanceType.resources = instanceType.computeResources(injection.GetOptions(ctx).AWSEnablePodENI)
instanceType.overhead = instanceType.computeOverhead(injection.GetOptions(ctx).VMMemoryOverhead)
instanceType.overhead = instanceType.computeOverhead(injection.GetOptions(ctx).VMMemoryOverhead, kc)
instanceType.requirements = instanceType.computeRequirements()
return instanceType
}
Expand Down Expand Up @@ -248,30 +248,51 @@ func (i *InstanceType) awsNeurons() *resource.Quantity {
return resources.Quantity(fmt.Sprint(count))
}

func (i *InstanceType) computeOverhead(vmMemOverhead float64) v1.ResourceList {
memory := i.memory()
func (i *InstanceType) computeOverhead(vmMemOverhead float64, kc *v1alpha5.KubeletConfiguration) v1.ResourceList {
pods := i.pods()
amiFamily := amifamily.GetAMIFamily(i.provider.AMIFamily, &amifamily.Options{})
memoryOverheadPods := pods.Value()
podsQuantity := pods.Value()
if amiFamily.ENILimitedMemoryOverhead() {
memoryOverheadPods = i.eniLimitedPods()
podsQuantity = i.eniLimitedPods()
}

overhead := v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(
100, // system-reserved
resource.DecimalSI),
v1.ResourceMemory: resource.MustParse(fmt.Sprintf("%dMi",
// vm-overhead
(int64(math.Ceil(float64(memory.Value())*vmMemOverhead/1024/1024)))+
// kube-reserved
((11*memoryOverheadPods)+255)+
// system-reserved
100+
// eviction threshold https://github.com/kubernetes/kubernetes/blob/ea0764452222146c47ec826977f49d7001b0ea8c/pkg/kubelet/apis/config/v1beta1/defaults_linux.go#L23
100,
)),
v1.ResourceEphemeralStorage: amiFamily.EphemeralBlockDeviceOverhead(),
srr := i.systemReservedResources(kc)
krr := i.kubeReservedResources(podsQuantity)
misc := i.miscResources(vmMemOverhead)
overhead := resources.Merge(srr, krr, misc)

return overhead
}

// The number of pods per node is calculated using the formula:
// max number of ENIs * (IPv4 Addresses per ENI -1) + 2
// https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt#L20
func (i *InstanceType) eniLimitedPods() int64 {
return *i.NetworkInfo.MaximumNetworkInterfaces*(*i.NetworkInfo.Ipv4AddressesPerInterface-1) + 2
}

func (i *InstanceType) systemReservedResources(kc *v1alpha5.KubeletConfiguration) v1.ResourceList {
// default system-reserved resources: https://kubernetes.io/docs/tasks/administer-cluster/reserve-compute-resources/#system-reserved
resources := v1.ResourceList{
v1.ResourceCPU: resource.MustParse("100m"),
v1.ResourceMemory: resource.MustParse("100Mi"),
v1.ResourceEphemeralStorage: resource.MustParse("1Gi"),
}

if kc != nil && kc.SystemReserved != nil {
for _, name := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage} {
if v, ok := kc.SystemReserved[name]; ok {
resources[name] = v
}
}
}
return resources
}

func (i *InstanceType) kubeReservedResources(pods int64) v1.ResourceList {
resources := v1.ResourceList{
v1.ResourceMemory: resource.MustParse(fmt.Sprintf("%dMi", (11*pods)+255)),
v1.ResourceEphemeralStorage: resource.MustParse("1Gi"), // default kube-reserved ephemeral-storage
}
// kube-reserved Computed from
// https://github.com/bottlerocket-os/bottlerocket/pull/1388/files#diff-bba9e4e3e46203be2b12f22e0d654ebd270f0b478dd34f40c31d7aa695620f2fR611
Expand All @@ -291,19 +312,24 @@ func (i *InstanceType) computeOverhead(vmMemOverhead float64) v1.ResourceList {
if cpu < cpuRange.end {
r = float64(cpu - cpuRange.start)
}
cpuOverhead := overhead[v1.ResourceCPU]
cpuOverhead := resources.Cpu()
cpuOverhead.Add(*resource.NewMilliQuantity(int64(r*cpuRange.percentage), resource.DecimalSI))
overhead[v1.ResourceCPU] = cpuOverhead
resources[v1.ResourceCPU] = *cpuOverhead
}
}
return overhead
return resources
}

// The number of pods per node is calculated using the formula:
// max number of ENIs * (IPv4 Addresses per ENI -1) + 2
// https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt#L20
func (i *InstanceType) eniLimitedPods() int64 {
return *i.NetworkInfo.MaximumNetworkInterfaces*(*i.NetworkInfo.Ipv4AddressesPerInterface-1) + 2
func (i *InstanceType) miscResources(vmMemOverhead float64) v1.ResourceList {
memory := i.memory().Value()
return v1.ResourceList{
v1.ResourceMemory: resource.MustParse(fmt.Sprintf("%dMi",
// vm-overhead
(int64(math.Ceil(float64(memory)*vmMemOverhead/1024/1024)))+
// eviction threshold https://github.com/kubernetes/kubernetes/blob/ea0764452222146c47ec826977f49d7001b0ea8c/pkg/kubelet/apis/config/v1beta1/defaults_linux.go#L23
100,
)),
}
}

func lowerKabobCase(s string) string {
Expand Down
6 changes: 4 additions & 2 deletions pkg/cloudprovider/aws/instancetypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import (
"sync"
"time"

"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
Expand Down Expand Up @@ -67,7 +69,7 @@ func NewInstanceTypeProvider(ec2api ec2iface.EC2API, subnetProvider *SubnetProvi
}

// Get all instance type options
func (p *InstanceTypeProvider) Get(ctx context.Context, provider *v1alpha1.AWS) ([]cloudprovider.InstanceType, error) {
func (p *InstanceTypeProvider) Get(ctx context.Context, provider *v1alpha1.AWS, kc *v1alpha5.KubeletConfiguration) ([]cloudprovider.InstanceType, error) {
p.Lock()
defer p.Unlock()
// Get InstanceTypes from EC2
Expand All @@ -89,7 +91,7 @@ func (p *InstanceTypeProvider) Get(ctx context.Context, provider *v1alpha1.AWS)
// don't warn as this can occur extremely often
price = math.MaxFloat64
}
instanceType := NewInstanceType(ctx, i, price, provider, p.createOfferings(i, instanceTypeZones[instanceTypeName]))
instanceType := NewInstanceType(ctx, i, kc, price, provider, p.createOfferings(i, instanceTypeZones[instanceTypeName]))
result = append(result, instanceType)
}
return result, nil
Expand Down
Loading