From 55631222a4be14b0ccb3acd385accee4e9cc5107 Mon Sep 17 00:00:00 2001 From: Ellis Tarn Date: Mon, 25 Jan 2021 15:49:53 -0800 Subject: [PATCH] Provisioner skeleton code --- .../{capacity_provisioner.go => capacity.go} | 12 ++-- pkg/cloudprovider/aws/factory.go | 4 +- .../fake/{provisioner.go => capacity.go} | 4 +- pkg/cloudprovider/fake/factory.go | 4 +- pkg/cloudprovider/types.go | 50 +++++++------- .../v1alpha1/allocation/greedyallocator.go | 69 +++++++++++++++++++ .../provisioner/v1alpha1/allocation/types.go | 22 ++++++ .../provisioner/v1alpha1/controller.go | 22 +++++- .../v1alpha1/reallocation/types.go | 25 +++++++ .../provisioner/v1alpha1/scheduling/types.go | 23 +++++++ 10 files changed, 194 insertions(+), 41 deletions(-) rename pkg/cloudprovider/aws/{capacity_provisioner.go => capacity.go} (86%) rename pkg/cloudprovider/fake/{provisioner.go => capacity.go} (84%) create mode 100644 pkg/controllers/provisioner/v1alpha1/allocation/greedyallocator.go create mode 100644 pkg/controllers/provisioner/v1alpha1/allocation/types.go create mode 100644 pkg/controllers/provisioner/v1alpha1/reallocation/types.go create mode 100644 pkg/controllers/provisioner/v1alpha1/scheduling/types.go diff --git a/pkg/cloudprovider/aws/capacity_provisioner.go b/pkg/cloudprovider/aws/capacity.go similarity index 86% rename from pkg/cloudprovider/aws/capacity_provisioner.go rename to pkg/cloudprovider/aws/capacity.go index ca2fb9287a97..db8fbcd2005c 100644 --- a/pkg/cloudprovider/aws/capacity_provisioner.go +++ b/pkg/cloudprovider/aws/capacity.go @@ -23,17 +23,17 @@ import ( "github.com/awslabs/karpenter/pkg/cloudprovider" ) -type CapacityProvisioner struct { +type Capacity struct { ec2Iface ec2iface.EC2API } -// NewCapacityProvisioner lets user provision nodes in AWS -func NewCapacityProvisioner(client ec2iface.EC2API) *CapacityProvisioner { - return &CapacityProvisioner{ec2Iface: client} +// NewCapacity constructs a Capacity client for AWS +func NewCapacity(client ec2iface.EC2API) *Capacity { + return &Capacity{ec2Iface: client} } -// Provision accepts desired capacity and contraints for provisioning -func (cp *CapacityProvisioner) Provision(context.Context, *cloudprovider.CapacityConstraints) error { +// Create a set of nodes given the constraints +func (cp *Capacity) Create(context.Context, cloudprovider.CapacityConstraints) error { // Convert contraints to the Node types and select the launch template // TODO diff --git a/pkg/cloudprovider/aws/factory.go b/pkg/cloudprovider/aws/factory.go index fae20511a376..5a9cf956b73a 100644 --- a/pkg/cloudprovider/aws/factory.go +++ b/pkg/cloudprovider/aws/factory.go @@ -72,8 +72,8 @@ func (f *Factory) QueueFor(spec *v1alpha1.QueueSpec) cloudprovider.Queue { } } -func (f *Factory) CapacityClient() cloudprovider.CapacityProvisioner { - return NewCapacityProvisioner(f.EC2Client) +func (f *Factory) Capacity() cloudprovider.Capacity { + return NewCapacity(f.EC2Client) } func withRegion(sess *session.Session) *session.Session { diff --git a/pkg/cloudprovider/fake/provisioner.go b/pkg/cloudprovider/fake/capacity.go similarity index 84% rename from pkg/cloudprovider/fake/provisioner.go rename to pkg/cloudprovider/fake/capacity.go index ca38899207c4..ef626a366baa 100644 --- a/pkg/cloudprovider/fake/provisioner.go +++ b/pkg/cloudprovider/fake/capacity.go @@ -20,9 +20,9 @@ import ( "github.com/awslabs/karpenter/pkg/cloudprovider" ) -type Provisioner struct { +type Capacity struct { } -func (p *Provisioner) Provision(context.Context, *cloudprovider.CapacityConstraints) error { +func (c *Capacity) Create(context.Context, cloudprovider.CapacityConstraints) error { return nil } diff --git a/pkg/cloudprovider/fake/factory.go b/pkg/cloudprovider/fake/factory.go index 2d839f542310..3be292e22ad3 100644 --- a/pkg/cloudprovider/fake/factory.go +++ b/pkg/cloudprovider/fake/factory.go @@ -63,6 +63,6 @@ func (f *Factory) NodeGroupFor(sng *v1alpha1.ScalableNodeGroupSpec) cloudprovide func (f *Factory) QueueFor(spec *v1alpha1.QueueSpec) cloudprovider.Queue { return &Queue{Id: spec.ID, WantErr: f.WantErr} } -func (f *Factory) CapacityClient() cloudprovider.CapacityProvisioner { - return &Provisioner{} +func (f *Factory) Capacity() cloudprovider.Capacity { + return &Capacity{} } diff --git a/pkg/cloudprovider/types.go b/pkg/cloudprovider/types.go index 3189d1dd7133..93429373e2f5 100644 --- a/pkg/cloudprovider/types.go +++ b/pkg/cloudprovider/types.go @@ -28,8 +28,9 @@ type Factory interface { NodeGroupFor(sng *v1alpha1.ScalableNodeGroupSpec) NodeGroup // QueueFor returns a queue for the provided spec QueueFor(queue *v1alpha1.QueueSpec) Queue - // CapacityClient returns a provisioner for the provider to create instances - CapacityClient() CapacityProvisioner + + // Capacity returns a provisioner for the provider to create instances + Capacity() Capacity } // Queue abstracts all provider specific behavior for Queues @@ -54,37 +55,34 @@ type NodeGroup interface { Stabilized() (bool, string, error) } -// CapacityProvisioner helps provision a desired capacity -// with a set of constraints in the cloud provider, -// number of instances and resource capacity can be controlled by -// setting the capacityConstraints -type CapacityProvisioner interface { - // Provision will send the request to cloud provider to provision the desired capacity. - Provision(context.Context, *CapacityConstraints) error +// Capacity provisions a set of nodes that fulfill a set of constraints. +type Capacity interface { + // Create a set of nodes to fulfill the desired capacity given constraints. + Create(context.Context, CapacityConstraints) error } -// Options are injected into cloud providers' factories -type Options struct { - Client client.Client +// CapacityConstraints lets the controller define the desired capacity, +// avalability zone, architecture for the desired nodes. +type CapacityConstraints struct { + // Zone constrains where a node can be created within a region. + Zone string + // Resources constrains the minimum capacity to provision (e.g. CPU, Memory). + Resources v1.ResourceList + // Overhead resources per node from system resources such a kubelet and daemonsets. + Overhead v1.ResourceList + // Architecture constrains the underlying hardware architecture. + Architecture *Architecture } -// Architecture for the provisioned capacity +// Architecture constrains the underlying node's compilation architecture. type Architecture string const ( - Linux386 Architecture = "linux/386" - LinuxAMD64 Architecture = "linux/amd64" + Linux386 Architecture = "linux/386" + // LinuxAMD64 Architecture = "linux/amd64" TODO ) -// CapacityConstraints lets the controller define the desired capacity, -// avalability zone, architecture for the desired nodes. -type CapacityConstraints struct { - // Zone constrains where a node can be created within a region - Zone *string - // Resources constrains the minimum capacity to provision (e.g. CPU, Memory) - Resources v1.ResourceList - // NodeOverhead constrains the per node overhead of system resources - NodeOverhead v1.ResourceList - // Architecture constrains the underlying hardware architecture. - Architecture *Architecture +// Options are injected into cloud providers' factories +type Options struct { + Client client.Client } diff --git a/pkg/controllers/provisioner/v1alpha1/allocation/greedyallocator.go b/pkg/controllers/provisioner/v1alpha1/allocation/greedyallocator.go new file mode 100644 index 000000000000..10a6104d1c2b --- /dev/null +++ b/pkg/controllers/provisioner/v1alpha1/allocation/greedyallocator.go @@ -0,0 +1,69 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package allocation + +import ( + "context" + "fmt" + + "github.com/awslabs/karpenter/pkg/cloudprovider" + v1 "k8s.io/api/core/v1" +) + +var _ Allocator = &GreedyAllocator{} + +// GreedyAllocator iteratively assigns pods to scheduling groups and then creates capacity for each group. +type GreedyAllocator struct { + Capacity cloudprovider.Capacity +} + +// +func (a *GreedyAllocator) Allocate(pods []*v1.Pod) error { + // 1. Separate pods into scheduling groups + groups := a.getSchedulingGroups(pods) + + // 2. Group pods into equally schedulable constraint group + for _, group := range groups { + if err := a.Capacity.Create(context.TODO(), group.Constraints); err != nil { + return fmt.Errorf("while creating capacity with constraints %v, %w", group.Constraints, err) + } + } + return nil +} + +type SchedulingGroup struct { + Pods []*v1.Pod + Constraints cloudprovider.CapacityConstraints +} + +func (a *GreedyAllocator) getSchedulingGroups(pods []*v1.Pod) []SchedulingGroup { + groups := []SchedulingGroup{} + + for _, pod := range pods { + for _, group := range groups { + if a.matchesGroup(pod, group) { + group.Pods = append(group.Pods, pod) + break + } + } + } + + return groups +} + +// TODO +func (a *GreedyAllocator) matchesGroup(pod *v1.Pod, group SchedulingGroup) bool { + return true +} diff --git a/pkg/controllers/provisioner/v1alpha1/allocation/types.go b/pkg/controllers/provisioner/v1alpha1/allocation/types.go new file mode 100644 index 000000000000..d415f29033a9 --- /dev/null +++ b/pkg/controllers/provisioner/v1alpha1/allocation/types.go @@ -0,0 +1,22 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package allocation + +import v1 "k8s.io/api/core/v1" + +// Allocator allocates new capacity for a set of pods +type Allocator interface { + Allocate([]*v1.Pod) error +} diff --git a/pkg/controllers/provisioner/v1alpha1/controller.go b/pkg/controllers/provisioner/v1alpha1/controller.go index 7ac72f2f0c11..9d2f6475ae36 100644 --- a/pkg/controllers/provisioner/v1alpha1/controller.go +++ b/pkg/controllers/provisioner/v1alpha1/controller.go @@ -21,13 +21,15 @@ import ( "github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha1" "github.com/awslabs/karpenter/pkg/controllers" + "github.com/awslabs/karpenter/pkg/controllers/provisioner/v1alpha1/allocation" v1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) // Controller for the resource type Controller struct { - Client client.Client + Client client.Client + Allocator allocation.Allocator } // For returns the resource this controller is for. @@ -45,6 +47,9 @@ func (c *Controller) Interval() time.Duration { } // Reconcile executes a control loop for the resource +// +// SKIP FOR NOW, Attempt to schedule pods on existing capacity +// SKIP FOR NOW, Attempt to schedule remaining pods by preempting existing pods func (c *Controller) Reconcile(object controllers.Object) error { _ = object.(*v1alpha1.Provisioner) @@ -54,8 +59,19 @@ func (c *Controller) Reconcile(object controllers.Object) error { return fmt.Errorf("Listing unscheduled pods, %w", err) } - // 2. SKIP FOR NOW, Attempt to schedule pods on existing capacity - // 3. SKIP FOR NOW, Attempt to schedule remaining pods by preempting existing pods + unschedulable := []*v1.Pod{} + for _, pod := range pods.Items { + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodScheduled && condition.Reason == v1.PodReasonUnschedulable { + unschedulable = append(unschedulable, &pod) + } + } + } + // 4. Attempt to schedule remaining pods by creating a set of nodes + if err := c.Allocator.Allocate(unschedulable); err != nil { + return fmt.Errorf("failed to allocate %d pods, %w", len(unschedulable), err) + } + return nil } diff --git a/pkg/controllers/provisioner/v1alpha1/reallocation/types.go b/pkg/controllers/provisioner/v1alpha1/reallocation/types.go new file mode 100644 index 000000000000..e689d23097d5 --- /dev/null +++ b/pkg/controllers/provisioner/v1alpha1/reallocation/types.go @@ -0,0 +1,25 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package reallocation + +import v1 "k8s.io/api/core/v1" + +// Reallocator evicts scheduled pods in favor of incoming pods. +type Reallocator interface { + // Reallocate takes a set of incoming pods and identifies existing pods that + // could be replaced by incoming pods. Returns incoming pods that did not + // result in a better fit. + Reallocate([]*v1.Pod) ([]*v1.Pod, error) +} diff --git a/pkg/controllers/provisioner/v1alpha1/scheduling/types.go b/pkg/controllers/provisioner/v1alpha1/scheduling/types.go new file mode 100644 index 000000000000..f4180e911a28 --- /dev/null +++ b/pkg/controllers/provisioner/v1alpha1/scheduling/types.go @@ -0,0 +1,23 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scheduling + +import v1 "k8s.io/api/core/v1" + +// Scheduler assigns incoming pods to existing nodes. +type Scheduler interface { + // Schedule a set of pods on existing nodes. + Schedule() ([]*v1.Pod, error) +}