From f12142f0debb52cb84bf1c56069366aa9b4fd830 Mon Sep 17 00:00:00 2001 From: Samuel Lang Date: Thu, 21 Jul 2022 06:53:04 +0200 Subject: [PATCH] permit custom device requests Karpenter is negative towards custom device requests it is unaware of, assuming those cannot be scheduled. fixes #1900 This changes the request handling to be scoped only to resource request that Karpenter is aware of and actively manages. The reasoning here is that it cannot influence those resource requests anyways, they come into existance by means of other concepts such as the device-plugin manager that even might be late bound and thus it is out of scope. --- .../provisioning/scheduling/node.go | 35 +++++++++++++++++-- .../provisioning/scheduling/suite_test.go | 26 +++----------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/pkg/controllers/provisioning/scheduling/node.go b/pkg/controllers/provisioning/scheduling/node.go index 8baf70c1029c..2e44aecdbc88 100644 --- a/pkg/controllers/provisioning/scheduling/node.go +++ b/pkg/controllers/provisioning/scheduling/node.go @@ -22,9 +22,12 @@ import ( "github.com/samber/lo" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "knative.dev/pkg/logging" "github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5" "github.com/aws/karpenter/pkg/cloudprovider" + "github.com/aws/karpenter/pkg/cloudprovider/aws/apis/v1alpha1" "github.com/aws/karpenter/pkg/scheduling" "github.com/aws/karpenter/pkg/utils/resources" "github.com/aws/karpenter/pkg/utils/sets" @@ -44,6 +47,33 @@ type Node struct { var nodeID int64 +// TODO: deduplicate with unexported func (*InstanceType).computeResources +func getWellKnownRequests() v1.ResourceList { + return v1.ResourceList{ + v1.ResourceCPU: resource.Quantity{}, + v1.ResourceMemory: resource.Quantity{}, + v1.ResourceEphemeralStorage: resource.Quantity{}, + v1.ResourcePods: resource.Quantity{}, + v1alpha1.ResourceAWSPodENI: resource.Quantity{}, + v1alpha1.ResourceNVIDIAGPU: resource.Quantity{}, + v1alpha1.ResourceAMDGPU: resource.Quantity{}, + v1alpha1.ResourceAWSNeuron: resource.Quantity{}, + } +} + +func filterWellKnownRequests(ctx context.Context, podResources v1.ResourceList) (filteredReq v1.ResourceList) { + wkReq := getWellKnownRequests() + filteredReq = make(v1.ResourceList, len(wkReq)) + for k, v := range podResources { + if _, ok := wkReq[k]; ok { + filteredReq[k] = v + } else { + logging.FromContext(ctx).With(k, v).Info("Ignoring custom pod request") + } + } + return +} + func NewNode(nodeTemplate *scheduling.NodeTemplate, topology *Topology, daemonResources v1.ResourceList, instanceTypes []cloudprovider.InstanceType) *Node { // Copy the template, and add hostname hostname := fmt.Sprintf("hostname-placeholder-%04d", atomic.AddInt64(&nodeID, 1)) @@ -90,10 +120,11 @@ func (n *Node) Add(ctx context.Context, pod *v1.Pod) error { nodeRequirements.Add(topologyRequirements) // Check instance type combinations - requests := resources.Merge(n.requests, resources.RequestsForPods(pod)) + podRequests := filterWellKnownRequests(ctx, resources.RequestsForPods(pod)) + requests := resources.Merge(n.requests, podRequests) instanceTypes := filterInstanceTypes(n.InstanceTypeOptions, nodeRequirements, requests) if len(instanceTypes) == 0 { - return fmt.Errorf("no instance type satisfied resources %s and requirements %s", resources.String(resources.RequestsForPods(pod)), nodeRequirements) + return fmt.Errorf("no instance type satisfied resources %s and requirements %s", resources.String(podRequests), nodeRequirements) } // Update node diff --git a/pkg/controllers/provisioning/scheduling/suite_test.go b/pkg/controllers/provisioning/scheduling/suite_test.go index f545f65d0c64..e42872f440c4 100644 --- a/pkg/controllers/provisioning/scheduling/suite_test.go +++ b/pkg/controllers/provisioning/scheduling/suite_test.go @@ -2912,8 +2912,8 @@ var _ = Describe("Instance Type Compatibility", func() { }) It("should launch pods with resources that aren't on any single instance type on different instances", func() { cloudProv.InstanceTypes = fake.InstanceTypes(5) - const fakeGPU1 = "karpenter.sh/super-great-gpu" - const fakeGPU2 = "karpenter.sh/even-better-gpu" + fakeGPU1 := v1alpha1.ResourceNVIDIAGPU + fakeGPU2 := v1alpha1.ResourceAMDGPU cloudProv.InstanceTypes[0].Resources()[fakeGPU1] = resource.MustParse("25") cloudProv.InstanceTypes[1].Resources()[fakeGPU2] = resource.MustParse("25") @@ -2936,24 +2936,6 @@ var _ = Describe("Instance Type Compatibility", func() { } Expect(nodeNames.Len()).To(Equal(2)) }) - It("should fail to schedule a pod with resources requests that aren't on a single instance type", func() { - cloudProv.InstanceTypes = fake.InstanceTypes(5) - const fakeGPU1 = "karpenter.sh/super-great-gpu" - const fakeGPU2 = "karpenter.sh/even-better-gpu" - cloudProv.InstanceTypes[0].Resources()[fakeGPU1] = resource.MustParse("25") - cloudProv.InstanceTypes[1].Resources()[fakeGPU2] = resource.MustParse("25") - - ExpectApplied(ctx, env.Client, provisioner) - pods := ExpectProvisioned(ctx, env.Client, controller, - test.UnschedulablePod(test.PodOptions{ - ResourceRequirements: v1.ResourceRequirements{ - Limits: v1.ResourceList{ - fakeGPU1: resource.MustParse("1"), - fakeGPU2: resource.MustParse("1")}, - }, - })) - ExpectNotScheduled(ctx, env.Client, pods[0]) - }) Context("Provider Specific Labels", func() { It("should filter instance types that match labels", func() { cloudProv.InstanceTypes = fake.InstanceTypes(5) @@ -3288,8 +3270,8 @@ var _ = Describe("Binpacking", func() { pod := ExpectProvisioned(ctx, env.Client, controller, test.UnschedulablePod(test.PodOptions{ ResourceRequirements: v1.ResourceRequirements{ - Requests: v1.ResourceList{"foo.com/weird-resources": resource.MustParse("0")}, - Limits: v1.ResourceList{"foo.com/weird-resources": resource.MustParse("0")}, + Requests: v1.ResourceList{v1alpha1.ResourceAWSNeuron: resource.MustParse("0")}, + Limits: v1.ResourceList{v1alpha1.ResourceAWSNeuron: resource.MustParse("0")}, }, })) // requesting a resource of quantity zero of a type unsupported by any instance is fine