From db14bf5284676e8383e8735dc19ebe8feb2b3833 Mon Sep 17 00:00:00 2001 From: David Dymko Date: Mon, 7 Feb 2022 09:39:14 -0500 Subject: [PATCH] vultr autoscaler implementation --- cluster-autoscaler/README.md | 2 + .../cloudprovider/builder/builder_all.go | 8 +- .../cloudprovider/builder/builder_vultr.go | 42 +++ .../cloudprovider/cloud_provider.go | 2 + cluster-autoscaler/cloudprovider/vultr/OWNERS | 4 + .../cloudprovider/vultr/README.md | 37 +++ .../cluster-autoscaler-deployment.yaml | 167 ++++++++++++ .../examples/cluster-autoscaler-secret.yaml | 13 + .../cloudprovider/vultr/govultr/nodepools.go | 120 +++++++++ .../cloudprovider/vultr/govultr/options.go | 35 +++ .../vultr/govultr/vultr_rest_client.go | 116 +++++++++ .../vultr/vultr_cloud_provider.go | 163 ++++++++++++ .../vultr/vultr_cloud_provider_test.go | 188 ++++++++++++++ .../cloudprovider/vultr/vultr_manager.go | 122 +++++++++ .../cloudprovider/vultr/vultr_manager_test.go | 102 ++++++++ .../cloudprovider/vultr/vultr_mock_test.go | 43 +++ .../cloudprovider/vultr/vultr_node_group.go | 232 +++++++++++++++++ .../vultr/vultr_node_group_test.go | 244 ++++++++++++++++++ 18 files changed, 1638 insertions(+), 2 deletions(-) create mode 100644 cluster-autoscaler/cloudprovider/builder/builder_vultr.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/OWNERS create mode 100644 cluster-autoscaler/cloudprovider/vultr/README.md create mode 100644 cluster-autoscaler/cloudprovider/vultr/examples/cluster-autoscaler-deployment.yaml create mode 100644 cluster-autoscaler/cloudprovider/vultr/examples/cluster-autoscaler-secret.yaml create mode 100644 cluster-autoscaler/cloudprovider/vultr/govultr/nodepools.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/govultr/options.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/govultr/vultr_rest_client.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/vultr_cloud_provider.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/vultr_cloud_provider_test.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/vultr_manager.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/vultr_manager_test.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/vultr_mock_test.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/vultr_node_group.go create mode 100644 cluster-autoscaler/cloudprovider/vultr/vultr_node_group_test.go diff --git a/cluster-autoscaler/README.md b/cluster-autoscaler/README.md index 0f20bc6a4560..e1356824c08b 100644 --- a/cluster-autoscaler/README.md +++ b/cluster-autoscaler/README.md @@ -27,6 +27,7 @@ You should also take a look at the notes and "gotchas" for your specific cloud p * [Linode](./cloudprovider/linode/README.md) * [ClusterAPI](./cloudprovider/clusterapi/README.md) * [BizflyCloud](./cloudprovider/bizflycloud/README.md) +* [Vultr](./cloudprovider/vultr/README.md) # Releases @@ -167,3 +168,4 @@ Supported cloud providers: * Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md * Hetzner https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md * Cluster API https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md +* Vultr https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/vultr/README.md diff --git a/cluster-autoscaler/cloudprovider/builder/builder_all.go b/cluster-autoscaler/cloudprovider/builder/builder_all.go index 190f58ec344d..86bc995f57b5 100644 --- a/cluster-autoscaler/cloudprovider/builder/builder_all.go +++ b/cluster-autoscaler/cloudprovider/builder/builder_all.go @@ -1,5 +1,5 @@ -//go:build !gce && !aws && !azure && !kubemark && !alicloud && !magnum && !digitalocean && !clusterapi && !huaweicloud && !ionoscloud && !linode && !hetzner && !bizflycloud && !brightbox && !packet -// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet +//go:build !gce && !aws && !azure && !kubemark && !alicloud && !magnum && !digitalocean && !clusterapi && !huaweicloud && !ionoscloud && !linode && !hetzner && !bizflycloud && !brightbox && !packet && !vultr +// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet,!vultr /* Copyright 2018 The Kubernetes Authors. @@ -39,6 +39,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/magnum" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/packet" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/vultr" "k8s.io/autoscaler/cluster-autoscaler/config" ) @@ -62,6 +63,7 @@ var AvailableCloudProviders = []string{ cloudprovider.BizflyCloudProviderName, cloudprovider.BrightboxProviderName, cloudprovider.PacketProviderName, + cloudprovider.VultrProviderName, } // DefaultCloudProvider is GCE. @@ -105,6 +107,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGro return ionoscloud.BuildIonosCloud(opts, do, rl) case cloudprovider.LinodeProviderName: return linode.BuildLinode(opts, do, rl) + case cloudprovider.VultrProviderName: + return vultr.BuildVultr(opts, do, rl) } return nil } diff --git a/cluster-autoscaler/cloudprovider/builder/builder_vultr.go b/cluster-autoscaler/cloudprovider/builder/builder_vultr.go new file mode 100644 index 000000000000..e6c20b814031 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/builder/builder_vultr.go @@ -0,0 +1,42 @@ +//go:build vultr +// +build vultr + +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package builder + +import ( + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" +) + +// AvailableCloudProviders supported by the cloud provider builder. +var AvailableCloudProviders = []string{ + cloudprovider.VultrProviderName, +} + +// DefaultCloudProvider for linode-only build is linode. +const DefaultCloudProvider = cloudprovider.VultrProviderName + +func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + switch opts.CloudProviderName { + case cloudprovider.VultrProviderName: + return vultr.BuildLinode(opts, do, rl) + } + + return nil +} diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index b2d1284062ce..0130495378fe 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -64,6 +64,8 @@ const ( OVHcloudProviderName = "ovhcloud" // LinodeProviderName gets the provider name of linode LinodeProviderName = "linode" + // VultrProviderName gets the provider name of vultr + VultrProviderName = "vultr" // PacketProviderName gets the provider name of packet PacketProviderName = "packet" ) diff --git a/cluster-autoscaler/cloudprovider/vultr/OWNERS b/cluster-autoscaler/cloudprovider/vultr/OWNERS new file mode 100644 index 000000000000..1248e0bafa81 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/OWNERS @@ -0,0 +1,4 @@ +approvers: +#- ddymko +reviewers: +#- ddymko diff --git a/cluster-autoscaler/cloudprovider/vultr/README.md b/cluster-autoscaler/cloudprovider/vultr/README.md new file mode 100644 index 000000000000..3974ccafcc2a --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/README.md @@ -0,0 +1,37 @@ +# Cluster Autoscaler for Vultr + +The cluster autoscaler for Vultr scales nodes in a VKE cluster. + +## Vultr Kubernetes Engine + +Vultr Kubernetes Engine ([VKE](https://www.vultr.com/docs/vultr-kubernetes-engine/)) is the managed kubernetes solution provided by Vultr. + +VKE lets users create Node Pools, i.e. groups of nodes each of the same type. + +The size of a Node Pool can be configured at any moment. The user cannot select specific nodes to be deleted when downsizing a Node Pool, rather, VKE will randomly select nodes to be deleted to reach the defined size, even if a node is not healthy or has been manually deleted. + +Nodes in a Node Pool are considered disposable: they can be deleted and recreated at any moment, deleting a single node outside of VKE will be recreated by Vultr after a small amount of time. + + +## Configuration + +It is mandatory to define the cloud configuration file `cloud-config`. You can see an example of the cloud config file at [examples/cluster-autoscaler-secret.yaml](examples/cluster-autoscaler-secret.yaml), it is an INI file with the following fields: + +The (JSON) configuration file of the Vultr cloud provider supports the following values: + +- `cluster_id`: the ID of the VKE cluster. +- `token`: the Vultr API key literally defined. + + +Configuring the autoscaler such as if it should be monitoring node pools or what the minimum and maximum values. Should be configured through the [Vultr API](https://www.vultr.com/api/#tag/kubernetes). +The autoscaler will pick up any changes and adjust accordingly. + +## Development + +Make sure you are inside the `cluster-autoscaler` path of the [autoscaler repository](https://github.com/kubernetes/autoscaler). + +Create the docker image: +``` +make container +``` +tag the generated docker image and push it to a registry. diff --git a/cluster-autoscaler/cloudprovider/vultr/examples/cluster-autoscaler-deployment.yaml b/cluster-autoscaler/cloudprovider/vultr/examples/cluster-autoscaler-deployment.yaml new file mode 100644 index 000000000000..f72f5bba1e15 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/examples/cluster-autoscaler-deployment.yaml @@ -0,0 +1,167 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler + name: cluster-autoscaler + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "namespaces" + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes", "csistoragecapacities", "csidrivers"] + verbs: ["watch", "list", "get"] + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "patch"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create"] + - apiGroups: ["coordination.k8s.io"] + resourceNames: ["cluster-autoscaler"] + resources: ["leases"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + spec: + serviceAccountName: cluster-autoscaler + containers: + - image: k8s.gcr.io/autoscaling/cluster-autoscaler:latest + name: cluster-autoscaler + resources: + limits: + cpu: 100m + memory: 300Mi + requests: + cpu: 100m + memory: 300Mi + command: + - ./cluster-autoscaler + - --v=2 + - --cloud-provider=vultr + - --cloud-config=/config/cloud-config + volumeMounts: + - name: ssl-certs + mountPath: /etc/ssl/certs/ca-certificates.crt + readOnly: true + - name: cloud-config + mountPath: /config + readOnly: true + imagePullPolicy: "Always" + volumes: + - name: ssl-certs + hostPath: + path: "/etc/ssl/certs/ca-certificates.crt" + - name: cloud-config + secret: + secretName: cluster-autoscaler-cloud-config \ No newline at end of file diff --git a/cluster-autoscaler/cloudprovider/vultr/examples/cluster-autoscaler-secret.yaml b/cluster-autoscaler/cloudprovider/vultr/examples/cluster-autoscaler-secret.yaml new file mode 100644 index 000000000000..55fb1278f285 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/examples/cluster-autoscaler-secret.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: cluster-autoscaler-cloud-config + namespace: kube-system +type: Opaque +stringData: + cloud-config: |- + { + "cluster_id": "6b8b7c8e-7314-4d17-bb0c-fcc9777230fa", + "token": "IAOBTDG35OTGH2UKCC3S6CNMDUPCN3ZNVBASQ" + } \ No newline at end of file diff --git a/cluster-autoscaler/cloudprovider/vultr/govultr/nodepools.go b/cluster-autoscaler/cloudprovider/vultr/govultr/nodepools.go new file mode 100644 index 000000000000..9462f016bc70 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/govultr/nodepools.go @@ -0,0 +1,120 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package govultr + +import ( + "context" + "fmt" + "net/http" + + "github.com/google/go-querystring/query" +) + +const vkePath = "/v2/kubernetes/clusters" + +// Nodepools interface +type Nodepools interface { + ListNodePools(ctx context.Context, vkeID string, options *ListOptions) ([]NodePool, *Meta, error) + UpdateNodePool(ctx context.Context, vkeID, nodePoolID string, updateReq *NodePoolReqUpdate) (*NodePool, error) + DeleteNodePoolInstance(ctx context.Context, vkeID, nodePoolID, nodeID string) error +} + +// NodePool represents a pool of nodes that are grouped by their label and plan type +type NodePool struct { + ID string `json:"id"` + DateCreated string `json:"date_created"` + DateUpdated string `json:"date_updated"` + Label string `json:"label"` + Plan string `json:"plan"` + Status string `json:"status"` + NodeQuantity int `json:"node_quantity"` + Tag string `json:"tag"` + Nodes []Node `json:"nodes"` + AutoScaler bool `json:"auto_scaler"` + MinNodes int `json:"min_nodes"` + MaxNodes int `json:"max_nodes"` +} + +// Node represents a node that will live within a nodepool +type Node struct { + ID string `json:"id"` + DateCreated string `json:"date_created"` + Label string `json:"label"` + Status string `json:"status"` +} + +// NodePoolReqUpdate struct used to update a node pool +type NodePoolReqUpdate struct { + NodeQuantity int `json:"node_quantity,omitempty"` + Tag string `json:"tag,omitempty"` +} + +type vkeNodePoolsBase struct { + NodePools []NodePool `json:"node_pools"` + Meta *Meta `json:"meta"` +} + +type vkeNodePoolBase struct { + NodePool *NodePool `json:"node_pool"` +} + +// ListNodePools returns all nodepools on a given VKE cluster +func (c *Client) ListNodePools(ctx context.Context, vkeID string, options *ListOptions) ([]NodePool, *Meta, error) { + req, err := c.newRequest(ctx, http.MethodGet, fmt.Sprintf("%s/%s/node-pools", vkePath, vkeID), nil) + if err != nil { + return nil, nil, err + } + + newValues, err := query.Values(options) + if err != nil { + return nil, nil, err + } + + req.URL.RawQuery = newValues.Encode() + + n := new(vkeNodePoolsBase) + if err = c.doWithContext(ctx, req, &n); err != nil { + return nil, nil, err + } + + return n.NodePools, n.Meta, nil +} + +// UpdateNodePool updates a given nodepool +func (c *Client) UpdateNodePool(ctx context.Context, vkeID, nodePoolID string, updateReq *NodePoolReqUpdate) (*NodePool, error) { + req, err := c.newRequest(ctx, http.MethodPatch, fmt.Sprintf("%s/%s/node-pools/%s", vkePath, vkeID, nodePoolID), updateReq) + if err != nil { + return nil, err + } + + np := new(vkeNodePoolBase) + if err = c.doWithContext(ctx, req, np); err != nil { + return nil, err + } + + return np.NodePool, nil +} + +// DeleteNodePoolInstance will delete a specific instance from a nodepool +func (c *Client) DeleteNodePoolInstance(ctx context.Context, vkeID, nodePoolID, nodeID string) error { + req, err := c.newRequest(ctx, http.MethodDelete, fmt.Sprintf("%s/%s/node-pools/%s/nodes/%s", vkePath, vkeID, nodePoolID, nodeID), nil) + if err != nil { + return err + } + + return c.doWithContext(ctx, req, nil) +} diff --git a/cluster-autoscaler/cloudprovider/vultr/govultr/options.go b/cluster-autoscaler/cloudprovider/vultr/govultr/options.go new file mode 100644 index 000000000000..9086df536857 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/govultr/options.go @@ -0,0 +1,35 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package govultr + +// ListOptions are the available fields that can be used with pagination +type ListOptions struct { + PerPage int `url:"per_page,omitempty"` + Cursor string `url:"cursor,omitempty"` +} + +// Meta represents the available pagination information +type Meta struct { + Total int `json:"total"` + Links *Links +} + +// Links represent the next/previous cursor in your pagination calls +type Links struct { + Next string `json:"next"` + Prev string `json:"prev"` +} diff --git a/cluster-autoscaler/cloudprovider/vultr/govultr/vultr_rest_client.go b/cluster-autoscaler/cloudprovider/vultr/govultr/vultr_rest_client.go new file mode 100644 index 000000000000..52dd7cb62695 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/govultr/vultr_rest_client.go @@ -0,0 +1,116 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package govultr + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "io/ioutil" + "net/http" + "net/url" +) + +// Client that is used for HTTP requests +type Client struct { + httpClient *http.Client + baseURL *url.URL + userAgent string +} + +// NewClient returns a client struct +func NewClient(client *http.Client) *Client { + // do something better here + u, _ := url.Parse("https://api.vultr.com/v2") + return &Client{ + httpClient: client, + baseURL: u, + userAgent: "kubernetes/cluster-autoscaler", + } +} + +// SetBaseUrl sets the base URL +func (c *Client) SetBaseUrl(baseURL string) (*Client, error) { + u, err := url.Parse(baseURL) + if err != nil { + return nil, err + } + + c.baseURL = u + + return c, nil +} + +// SetUserAgent sets the user-agent for HTTP requests +func (c *Client) SetUserAgent(userAgent string) *Client { + c.userAgent = userAgent + return c +} + +func (c *Client) newRequest(ctx context.Context, method, uri string, body interface{}) (*http.Request, error) { + resolvedURL, err := c.baseURL.Parse(uri) + if err != nil { + return nil, err + } + + buf := new(bytes.Buffer) + if body != nil { + if err = json.NewEncoder(buf).Encode(body); err != nil { + return nil, err + } + } + + req, err := http.NewRequest(method, resolvedURL.String(), buf) + if err != nil { + return nil, err + } + + req.Header.Add("User-Agent", c.userAgent) + req.Header.Add("Content-Type", "application/json") + + return req, nil +} + +func (c *Client) doWithContext(ctx context.Context, r *http.Request, data interface{}) error { + req := r.WithContext(ctx) + res, err := c.httpClient.Do(req) + + if err != nil { + return err + } + + //todo handle this + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return err + } + + if res.StatusCode >= http.StatusOK && res.StatusCode <= http.StatusNoContent { + if data != nil { + if err := json.Unmarshal(body, data); err != nil { + return err + } + } + return nil + } + + //todo make into errors struct? + return errors.New(string(body)) +} diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_cloud_provider.go b/cluster-autoscaler/cloudprovider/vultr/vultr_cloud_provider.go new file mode 100644 index 000000000000..de8bfaedbd48 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_cloud_provider.go @@ -0,0 +1,163 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vultr + +import ( + "fmt" + "os" + "strings" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/klog/v2" +) + +var _ cloudprovider.CloudProvider = (*vultrCloudProvider)(nil) + +const vultrProviderIDPrefix = "vultr://" + +type vultrCloudProvider struct { + manager *manager + resourceLimiter *cloudprovider.ResourceLimiter +} + +func newVultrCloudProvider(manager *manager, rl *cloudprovider.ResourceLimiter) *vultrCloudProvider { + return &vultrCloudProvider{ + manager: manager, + resourceLimiter: rl, + } +} + +// Name returns name of the cloud provider. +func (v *vultrCloudProvider) Name() string { + return cloudprovider.VultrProviderName +} + +// NodeGroups returns all node groups configured for this cloud provider. +func (v *vultrCloudProvider) NodeGroups() []cloudprovider.NodeGroup { + nodeGroups := make([]cloudprovider.NodeGroup, len(v.manager.nodeGroups)) + for i, ng := range v.manager.nodeGroups { + nodeGroups[i] = ng + } + return nodeGroups +} + +// NodeGroupForNode returns the node group for the given node, nil if the node +// should not be processed by cluster autoscaler, or non-nil error if such +// occurred. Must be implemented. +func (v *vultrCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) { + providerID := node.Spec.ProviderID + + // we want to find the pool for a specific node + for _, group := range v.manager.nodeGroups { + nodes, err := group.Nodes() + if err != nil { + return nil, err + } + + for _, node := range nodes { + if node.Id == providerID { + return group, nil + } + } + } + return nil, nil +} + +// Pricing returns pricing model for this cloud provider or error if not available. +// Implementation optional. +func (v *vultrCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetAvailableMachineTypes get all machine types that can be requested from the cloud provider. +// Implementation optional. +func (v *vultrCloudProvider) GetAvailableMachineTypes() ([]string, error) { + return []string{}, nil +} + +// NewNodeGroup builds a theoretical node group based on the node definition +// provided. The node group is not automatically created on the cloud provider +// side. The node group is not returned by NodeGroups() until it is created. +// Implementation optional. +func (v *vultrCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []apiv1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// GetResourceLimiter returns struct containing limits (max, min) for resources (cores, memory etc.). +func (v *vultrCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) { + return v.resourceLimiter, nil +} + +// GPULabel returns the label added to nodes with GPU resource. +func (v *vultrCloudProvider) GPULabel() string { + return "" +} + +// GetAvailableGPUTypes return all available GPU types cloud provider supports. +func (v *vultrCloudProvider) GetAvailableGPUTypes() map[string]struct{} { + return nil +} + +// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc. +func (v *vultrCloudProvider) Cleanup() error { + return nil +} + +// Refresh is called before every main loop and can be used to dynamically update cloud provider state. +// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh(). +func (v *vultrCloudProvider) Refresh() error { + klog.V(4).Info("Refreshing node group cache") + return v.manager.Refresh() +} + +// toProviderID returns a provider ID from the given node ID. +func toProviderID(nodeID string) string { + return fmt.Sprintf("%s%s", vultrProviderIDPrefix, nodeID) +} + +// toNodeID returns a node or droplet ID from the given provider ID. +func toNodeID(providerID string) string { + return strings.TrimPrefix(providerID, vultrProviderIDPrefix) +} + +// BuildVultr builds the Vultr cloud provider. +func BuildVultr(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { + if opts.CloudConfig == "" { + klog.Fatalf("No config file provided, please specify it via the --cloud-config flag") + } + + configFile, err := os.Open(opts.CloudConfig) + if err != nil { + klog.Fatalf("Could not open cloud provider configuration file %q, error: %v", opts.CloudConfig, err) + } + + defer configFile.Close() + + manager, err := newManager(configFile) + if err != nil { + klog.Fatalf("Failed to create Vultr manager: %v", err) + } + + // the cloud provider automatically uses all node pools in Vultr. + // This means we don't use the cloudprovider.NodeGroupDiscoveryOptions + // flags (which can be set via '--node-group-auto-discovery' or '-nodes') + return newVultrCloudProvider(manager, rl) +} diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/vultr/vultr_cloud_provider_test.go new file mode 100644 index 000000000000..a89dfee609c7 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_cloud_provider_test.go @@ -0,0 +1,188 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vultr + +import ( + "context" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/vultr/govultr" +) + +func TestVultrCloudProvider_newVultrCloudProvider(t *testing.T) { + config := `{"token": "123-456", "cluster_id": "abc"}` + + manager, err := newManager(strings.NewReader(config)) + require.NoError(t, err) + + client := &vultrClientMock{} + ctx := context.Background() + + client.On("ListNodePools", ctx, manager.clusterID, nil).Return( + []govultr.NodePool{ + { + ID: "1234", + AutoScaler: true, + MinNodes: 1, + MaxNodes: 2, + }, + { + ID: "4567", + AutoScaler: true, + MinNodes: 5, + MaxNodes: 8, + }, + { + ID: "9876", + AutoScaler: false, + MinNodes: 5, + MaxNodes: 8, + }, + }, + &govultr.Meta{}, + nil, + ).Once() + + manager.client = client + rl := &cloudprovider.ResourceLimiter{} + + _ = newVultrCloudProvider(manager, rl) + +} + +func TestVultrCloudProvider_NewNodeGroup(t *testing.T) { + config := `{"token": "123-456", "cluster_id": "abc"}` + + manager, err := newManager(strings.NewReader(config)) + require.NoError(t, err) + + client := &vultrClientMock{} + ctx := context.Background() + + client.On("ListNodePools", ctx, manager.clusterID, nil).Return( + []govultr.NodePool{ + { + ID: "1234", + AutoScaler: true, + MinNodes: 1, + MaxNodes: 2, + }, + { + ID: "4567", + AutoScaler: true, + MinNodes: 5, + MaxNodes: 8, + }, + { + ID: "9876", + AutoScaler: false, + MinNodes: 5, + MaxNodes: 8, + }, + }, + &govultr.Meta{}, + nil, + ).Once() + + manager.client = client + rl := &cloudprovider.ResourceLimiter{} + + provider := newVultrCloudProvider(manager, rl) + err = provider.Refresh() + assert.NoError(t, err) + + nodes := provider.NodeGroups() + assert.Equal(t, len(nodes), 2, "number of nodes do not match") + +} + +func TestVultrCloudProvider_NodeGroupForNode(t *testing.T) { + config := `{"token": "123-456", "cluster_id": "abc"}` + + manager, err := newManager(strings.NewReader(config)) + require.NoError(t, err) + + client := &vultrClientMock{} + ctx := context.Background() + + client.On("ListNodePools", ctx, manager.clusterID, nil).Return( + []govultr.NodePool{ + { + ID: "a", + AutoScaler: true, + Nodes: []govultr.Node{ + { + ID: "np-1234", + Status: "Active", + }, + }, + MinNodes: 1, + MaxNodes: 2, + }, + { + ID: "b", + AutoScaler: true, + Nodes: []govultr.Node{ + { + ID: "np-456", + Status: "Active", + }, + }, + MinNodes: 5, + MaxNodes: 8, + }, + { + ID: "c", + AutoScaler: false, + MinNodes: 5, + MaxNodes: 8, + }, + }, + &govultr.Meta{}, + nil, + ).Once() + + manager.client = client + rl := &cloudprovider.ResourceLimiter{} + + provider := newVultrCloudProvider(manager, rl) + err = provider.Refresh() + assert.NoError(t, err) + + node := &apiv1.Node{Spec: apiv1.NodeSpec{ProviderID: toProviderID("np-1234")}} + + nodeGroup, err := provider.NodeGroupForNode(node) + require.NoError(t, err) + + require.NotNil(t, nodeGroup) + require.Equal(t, nodeGroup.Id(), "a", "nodegroup IDs do not match") +} + +func TestVultrCloudProvider_Name(t *testing.T) { + config := `{"token": "123-456", "cluster_id": "abc"}` + + manager, err := newManager(strings.NewReader(config)) + require.NoError(t, err) + + p := newVultrCloudProvider(manager, &cloudprovider.ResourceLimiter{}) + assert.Equal(t, cloudprovider.VultrProviderName, p.Name(), "provider name doesn't match") +} diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_manager.go b/cluster-autoscaler/cloudprovider/vultr/vultr_manager.go new file mode 100644 index 000000000000..44f28ad32b2e --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_manager.go @@ -0,0 +1,122 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vultr + +import ( + "context" + "encoding/json" + "errors" + "io" + "io/ioutil" + "net/http" + "time" + + "golang.org/x/oauth2" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/vultr/govultr" + "k8s.io/klog/v2" +) + +type vultrClient interface { + ListNodePools(ctx context.Context, vkeID string, options *govultr.ListOptions) ([]govultr.NodePool, *govultr.Meta, error) + UpdateNodePool(ctx context.Context, vkeID, nodePoolID string, updateReq *govultr.NodePoolReqUpdate) (*govultr.NodePool, error) + DeleteNodePoolInstance(ctx context.Context, vkeID, nodePoolID, nodeID string) error +} + +type manager struct { + clusterID string + client vultrClient + nodeGroups []*NodeGroup +} + +// Config is the configuration of the Vultr cloud provider +type Config struct { + ClusterID string `json:"cluster_id"` + Token string `json:"token"` +} + +func newManager(config io.Reader) (*manager, error) { + cfg := &Config{} + + if config != nil { + body, err := ioutil.ReadAll(config) + if err != nil { + return nil, err + } + + if err := json.Unmarshal(body, cfg); err != nil { + return nil, err + } + } + + //todo smarter checking to see if token is set + if cfg.Token == "" { + return nil, errors.New("empty token was supplied") + } + + if cfg.ClusterID == "" { + return nil, errors.New("empty cluster ID was supplied") + } + + tokenSource := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: cfg.Token}) + oauth2Client := &http.Client{ + Timeout: 60 * time.Second, + Transport: &oauth2.Transport{ + Source: tokenSource, + }, + } + + m := &manager{ + client: govultr.NewClient(oauth2Client), + nodeGroups: make([]*NodeGroup, 0), + clusterID: cfg.ClusterID, + } + + return m, nil +} + +func (m *manager) Refresh() error { + ctx := context.Background() + + //todo do we want to set the paging options here? + nodePools, _, err := m.client.ListNodePools(ctx, m.clusterID, nil) + if err != nil { + return err + } + + var group []*NodeGroup + for _, nodePool := range nodePools { + + if !nodePool.AutoScaler { + continue + } + + klog.V(3).Infof("adding node pool: %q name with min nodes %d and max nodes %d", nodePool.Label, nodePool.MinNodes, nodePool.MaxNodes) + + np := nodePool + group = append(group, &NodeGroup{ + id: nodePool.ID, + clusterID: m.clusterID, + client: m.client, + nodePool: &np, // we had to set this as a pointer because we don't return the [] as []* + minSize: nodePool.MinNodes, + maxSize: nodePool.MaxNodes, + }) + } + + m.nodeGroups = group + return nil +} diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_manager_test.go b/cluster-autoscaler/cloudprovider/vultr/vultr_manager_test.go new file mode 100644 index 000000000000..e7c25dc3ab2a --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_manager_test.go @@ -0,0 +1,102 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vultr + +import ( + "context" + "errors" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/vultr/govultr" +) + +func TestManager_newManager(t *testing.T) { + + t.Run("basic token and cluster id", func(t *testing.T) { + config := `{"token": "123-456", "cluster_id": "abc"}` + + manager, err := newManager(strings.NewReader(config)) + require.NoError(t, err) + + assert.Equal(t, manager.clusterID, "abc", "invalid cluster id") + }) + + t.Run("missing token", func(t *testing.T) { + config := `{"token": "", "cluster_id": "abc"}` + + _, err := newManager(strings.NewReader(config)) + assert.EqualError(t, err, errors.New("empty token was supplied").Error()) + }) + + t.Run("missing cluster id", func(t *testing.T) { + config := `{"token": "123-345", "cluster_id": ""}` + + _, err := newManager(strings.NewReader(config)) + assert.EqualError(t, err, errors.New("empty cluster ID was supplied").Error()) + }) +} + +func TestManager_Refresh(t *testing.T) { + config := `{"token": "123-456", "cluster_id": "abc"}` + + manager, err := newManager(strings.NewReader(config)) + require.NoError(t, err) + + client := &vultrClientMock{} + ctx := context.Background() + + client.On("ListNodePools", ctx, manager.clusterID, nil).Return( + []govultr.NodePool{ + { + ID: "1234", + AutoScaler: true, + MinNodes: 1, + MaxNodes: 2, + }, + { + ID: "4567", + AutoScaler: true, + MinNodes: 5, + MaxNodes: 8, + }, + { + ID: "9876", + AutoScaler: false, + MinNodes: 5, + MaxNodes: 8, + }, + }, + &govultr.Meta{}, + nil, + ).Once() + + manager.client = client + + err = manager.Refresh() + assert.NoError(t, err) + assert.Equal(t, len(manager.nodeGroups), 2, "number of nodepools do not match") + + assert.Equal(t, manager.nodeGroups[0].minSize, 1, "minimum node for first group does not match") + assert.Equal(t, manager.nodeGroups[0].MaxSize(), 2, "minimum node for first group does not match") + // + assert.Equal(t, manager.nodeGroups[1].minSize, 5, "minimum node for first group does not match") + assert.Equal(t, manager.nodeGroups[1].maxSize, 8, "minimum node for first group does not match") + +} diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_mock_test.go b/cluster-autoscaler/cloudprovider/vultr/vultr_mock_test.go new file mode 100644 index 000000000000..356290f0bdb3 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_mock_test.go @@ -0,0 +1,43 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vultr + +import ( + "context" + + "github.com/stretchr/testify/mock" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/vultr/govultr" +) + +type vultrClientMock struct { + mock.Mock +} + +func (v *vultrClientMock) ListNodePools(ctx context.Context, vkeID string, options *govultr.ListOptions) ([]govultr.NodePool, *govultr.Meta, error) { + args := v.Called(ctx, vkeID, nil) + return args.Get(0).([]govultr.NodePool), args.Get(1).(*govultr.Meta), args.Error(2) +} + +func (v *vultrClientMock) UpdateNodePool(ctx context.Context, vkeID, nodePoolID string, updateReq *govultr.NodePoolReqUpdate) (*govultr.NodePool, error) { + args := v.Called(ctx, vkeID, nodePoolID, updateReq) + return args.Get(0).(*govultr.NodePool), args.Error(1) +} + +func (v *vultrClientMock) DeleteNodePoolInstance(ctx context.Context, vkeID, nodePoolID, nodeID string) error { + args := v.Called(ctx, vkeID, nodePoolID, nodeID) + return args.Error(0) +} diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_node_group.go b/cluster-autoscaler/cloudprovider/vultr/vultr_node_group.go new file mode 100644 index 000000000000..0b96d76b619d --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_node_group.go @@ -0,0 +1,232 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vultr + +import ( + "context" + "errors" + "fmt" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/vultr/govultr" + "k8s.io/autoscaler/cluster-autoscaler/config" + schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" +) + +const ( + vkeLabel = "vke.vultr.com" + nodeIDLabel = vkeLabel + "/node-id" +) + +// NodeGroup implements cloudprovider.NodeGroup interface. NodeGroup contains +// configuration info and functions to control a set of nodes that have the +// same capacity and set of labels. +type NodeGroup struct { + id string + clusterID string + client vultrClient + nodePool *govultr.NodePool + + minSize int + maxSize int +} + +// MaxSize returns maximum size of the node group. +func (n *NodeGroup) MaxSize() int { + return n.maxSize +} + +// MinSize returns minimum size of the node group. +func (n *NodeGroup) MinSize() int { + return n.minSize +} + +// TargetSize returns the current target size of the node group. It is possible +// that the number of nodes in Kubernetes is different at the moment but should +// be equal to Size() once everything stabilizes (new nodes finish startup and +// registration or removed nodes are deleted completely). Implementation +// required. +func (n *NodeGroup) TargetSize() (int, error) { + return n.nodePool.NodeQuantity, nil +} + +// IncreaseSize increases the size of the node group. To delete a node you need +// to explicitly name it and use DeleteNode. This function should wait until +// node group size is updated. +func (n *NodeGroup) IncreaseSize(delta int) error { + if delta <= 0 { + return fmt.Errorf("delta must be positive, have: %d", delta) + } + + targetSize := n.nodePool.NodeQuantity + delta + if targetSize > n.MaxSize() { + return fmt.Errorf("size increase is too large. current: %d desired: %d max: %d", + n.nodePool.NodeQuantity, targetSize, n.MaxSize()) + } + + req := &govultr.NodePoolReqUpdate{NodeQuantity: targetSize} + + updatedNodePool, err := n.client.UpdateNodePool(context.Background(), n.clusterID, n.id, req) + if err != nil { + return err + } + + if updatedNodePool.NodeQuantity != targetSize { + return fmt.Errorf("couldn't increase size to %d (delta: %d). Current size is: %d", + targetSize, delta, updatedNodePool.NodeQuantity) + } + + // update internal cache + n.nodePool.NodeQuantity = targetSize + return nil +} + +// DeleteNodes deletes nodes from this node group (and also increasing the size +// of the node group with that). Error is returned either on failure or if the +// given node doesn't belong to this node group. This function should wait +// until node group size is updated. Implementation required. +func (n *NodeGroup) DeleteNodes(nodes []*apiv1.Node) error { + for _, node := range nodes { + nodeID, ok := node.Labels[nodeIDLabel] + + //todo review this + if !ok { + // CA creates fake node objects to represent upcoming VMs that + // haven't registered as nodes yet. We cannot delete the node at + // this point. + return fmt.Errorf("cannot delete node %q with provider ID %q on node pool %q: node ID label %q is missing", node.Name, node.Spec.ProviderID, n.id, nodeIDLabel) + } + + err := n.client.DeleteNodePoolInstance(context.Background(), n.clusterID, n.id, nodeID) + if err != nil { + return fmt.Errorf("deleting node failed for cluster: %q node pool: %q node: %q: %s", + n.clusterID, n.id, nodeID, err) + } + + n.nodePool.NodeQuantity-- + } + + return nil +} + +// DecreaseTargetSize decreases the target size of the node group. This function +// doesn't permit to delete any existing node and can be used only to reduce the +// request for new nodes that have not been yet fulfilled. Delta should be negative. +// It is assumed that cloud provider will not delete the existing nodes when there +// is an option to just decrease the target. +func (n *NodeGroup) DecreaseTargetSize(delta int) error { + if delta >= 0 { + return fmt.Errorf("delta must be negative, have: %d", delta) + } + + targetSize := n.nodePool.NodeQuantity + delta + if targetSize < n.MinSize() { + return fmt.Errorf("size decrease is too small. current: %d desired: %d min: %d", + n.nodePool.NodeQuantity, targetSize, n.MinSize()) + } + + req := &govultr.NodePoolReqUpdate{NodeQuantity: targetSize} + updatedNodePool, err := n.client.UpdateNodePool(context.Background(), n.clusterID, n.id, req) + if err != nil { + return err + } + + if updatedNodePool.NodeQuantity != targetSize { + return fmt.Errorf("couldn't increase size to %d (delta: %d). Current size is: %d", + targetSize, delta, updatedNodePool.NodeQuantity) + } + + // update internal cache + n.nodePool.NodeQuantity = targetSize + return nil +} + +// Id returns an unique identifier of the node group. +func (n *NodeGroup) Id() string { + return n.id +} + +// Debug returns a string containing all information regarding this node group. +func (n *NodeGroup) Debug() string { + return fmt.Sprintf("node group ID: %s (min:%d max:%d)", n.Id(), n.MinSize(), n.MaxSize()) +} + +// Nodes returns a list of all nodes that belong to this node group. It is +// required that Instance objects returned by this method have ID field set. +// Other fields are optional. +func (n *NodeGroup) Nodes() ([]cloudprovider.Instance, error) { + if n.nodePool == nil { + return nil, errors.New("node pool instance is not created") + } + + nodes := n.nodePool.Nodes + instances := make([]cloudprovider.Instance, 0, len(nodes)) + for _, nd := range nodes { + + i := cloudprovider.Instance{ + Id: toProviderID(nd.ID), + } + + instances = append(instances, i) + } + return instances, nil + +} + +// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty +// (as if just started) node. This will be used in scale-up simulations to +// predict what would a new node look like if a node group was expanded. The +// returned NodeInfo is expected to have a fully populated Node object, with +// all of the labels, capacity and allocatable information as well as all pods +// that are started on the node by default, using manifest (most likely only +// kube-proxy). Implementation optional. +func (n *NodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Exist checks if the node group really exists on the cloud provider side. +// Allows to tell the theoretical node group from the real one. Implementation +// required. +func (n *NodeGroup) Exist() bool { + return n.nodePool != nil +} + +// Create creates the node group on the cloud provider side. Implementation +// optional. +func (n *NodeGroup) Create() (cloudprovider.NodeGroup, error) { + return nil, cloudprovider.ErrNotImplemented +} + +// Delete deletes the node group on the cloud provider side. This will be +// executed only for autoprovisioned node groups, once their size drops to 0. +// Implementation optional. +func (n *NodeGroup) Delete() error { + return cloudprovider.ErrNotImplemented +} + +// Autoprovisioned returns true if the node group is autoprovisioned. An +// autoprovisioned group was created by CA and can be deleted when scaled to 0. +func (n *NodeGroup) Autoprovisioned() bool { + return false +} + +// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular +// NodeGroup. Returning a nil will result in using default options. +func (n *NodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) { + return nil, cloudprovider.ErrNotImplemented +} diff --git a/cluster-autoscaler/cloudprovider/vultr/vultr_node_group_test.go b/cluster-autoscaler/cloudprovider/vultr/vultr_node_group_test.go new file mode 100644 index 000000000000..eed5d2b7af98 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/vultr/vultr_node_group_test.go @@ -0,0 +1,244 @@ +/* +Copyright 2022 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vultr + +import ( + "context" + "errors" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/vultr/govultr" +) + +func TestNodeGroup_Debug(t *testing.T) { + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{ + NodeQuantity: 3, + MinNodes: 1, + MaxNodes: 10, + }) + + d := ng.Debug() + exp := "node group ID: a (min:1 max:10)" + assert.Equal(t, exp, d, "debug string do not match") +} + +func TestNodeGroup_TargetSize(t *testing.T) { + nodes := 5 + + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{ + NodeQuantity: nodes, + }) + + size, err := ng.TargetSize() + assert.NoError(t, err) + assert.Equal(t, nodes, size, "target size is not correct") +} + +func TestNodeGroup_IncreaseSize(t *testing.T) { + client := &vultrClientMock{} + + t.Run("basic increase", func(t *testing.T) { + nodeQuant := 2 + delta := 1 + ng := testData(client, &govultr.NodePool{NodeQuantity: nodeQuant, MinNodes: 2, MaxNodes: 3}) + + newQaunt := nodeQuant + delta + client.On("UpdateNodePool", context.Background(), ng.clusterID, ng.id, + &govultr.NodePoolReqUpdate{NodeQuantity: newQaunt}).Return(&govultr.NodePool{NodeQuantity: newQaunt}, nil).Once() + + err := ng.IncreaseSize(delta) + assert.NoError(t, err) + }) + + t.Run("negative increase", func(t *testing.T) { + numberOfNodes := 3 + delta := -1 + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{ + NodeQuantity: numberOfNodes, + }) + err := ng.IncreaseSize(delta) + + exp := fmt.Errorf("delta must be positive, have: %d", delta) + assert.EqualError(t, err, exp.Error(), "size increase must be positive") + }) + + t.Run("zero increase", func(t *testing.T) { + numberOfNodes := 3 + delta := 0 + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{ + NodeQuantity: numberOfNodes, + }) + + exp := fmt.Errorf("delta must be positive, have: %d", delta) + + err := ng.IncreaseSize(delta) + assert.EqualError(t, err, exp.Error(), "size increase must be positive") + }) + + t.Run("large increase above maximum", func(t *testing.T) { + nodes := 15 + delta := 10 + + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{ + NodeQuantity: nodes, + }) + + exp := fmt.Errorf("size increase is too large. current: %d desired: %d max: %d", + nodes, nodes+delta, ng.MaxSize()) + + err := ng.IncreaseSize(delta) + assert.EqualError(t, err, exp.Error(), "size increase is too large") + }) +} + +func TestNodeGroup_DecreaseTargetSize(t *testing.T) { + t.Run("basic decrease", func(t *testing.T) { + client := &vultrClientMock{} + + nodeQuant := 3 + delta := -1 + ng := testData(client, &govultr.NodePool{NodeQuantity: nodeQuant, MinNodes: 2, MaxNodes: 3}) + + newQaunt := nodeQuant + delta + client.On("UpdateNodePool", context.Background(), ng.clusterID, ng.id, + &govultr.NodePoolReqUpdate{NodeQuantity: newQaunt}).Return(&govultr.NodePool{NodeQuantity: newQaunt}, nil).Once() + + err := ng.DecreaseTargetSize(delta) + assert.NoError(t, err) + }) + + t.Run("positive decrease", func(t *testing.T) { + nodes := 5 + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{ + NodeQuantity: nodes, + }) + + delta := 1 + err := ng.DecreaseTargetSize(delta) + + exp := fmt.Errorf("delta must be negative, have: %d", delta) + assert.EqualError(t, err, exp.Error(), "size decrease must be negative") + }) + + t.Run("small decrease below minimum", func(t *testing.T) { + delta := -2 + numberOfNodes := 3 + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{ + NodeQuantity: numberOfNodes, + MinNodes: 2, + MaxNodes: 5, + }) + + exp := fmt.Errorf("size decrease is too small. current: %d desired: %d min: %d", + numberOfNodes, numberOfNodes+delta, ng.MinSize()) + err := ng.DecreaseTargetSize(delta) + assert.EqualError(t, err, exp.Error(), "size decrease is too small") + }) +} + +func TestNodeGroup_Nodes(t *testing.T) { + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{ + Nodes: []govultr.Node{ + { + ID: "a-1", + }, + { + ID: "a-2", + }, + }, + }) + + instances := []cloudprovider.Instance{ + { + Id: "vultr://a-1", + }, + { + Id: "vultr://a-2", + }, + } + + nodes, err := ng.Nodes() + assert.NoError(t, err) + assert.Equal(t, instances, nodes, "nodes do not match") +} + +func TestNodeGroup_DeleteNodes(t *testing.T) { + + t.Run("deleting node", func(t *testing.T) { + + ctx := context.Background() + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{NodeQuantity: 2, MinNodes: 2, MaxNodes: 3, Nodes: []govultr.Node{{ID: "a"}}}) + + nodes := []*apiv1.Node{ + {ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{nodeIDLabel: "a"}}}, + } + + client.On("DeleteNodePoolInstance", ctx, ng.clusterID, ng.id, "a").Return(nil).Once() + + err := ng.DeleteNodes(nodes) + assert.NoError(t, err) + }) + + t.Run("delete failure", func(t *testing.T) { + ctx := context.Background() + client := &vultrClientMock{} + ng := testData(client, &govultr.NodePool{NodeQuantity: 2, MinNodes: 2, MaxNodes: 3, Nodes: []govultr.Node{{ID: "a"}}}) + + nodes := []*apiv1.Node{ + {ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{nodeIDLabel: "a"}}}, + } + + client.On("DeleteNodePoolInstance", ctx, ng.clusterID, ng.id, "a").Return(errors.New("error")).Once() + + err := ng.DeleteNodes(nodes) + assert.Error(t, err) + }) +} + +func TestNodeGroup_Exist(t *testing.T) { + client := &vultrClientMock{} + nodeGroup := testData(client, &govultr.NodePool{MinNodes: 1, MaxNodes: 2}) + + assert.Equal(t, true, nodeGroup.Exist(), "nodegroup should exist") + +} + +func testData(client vultrClient, np *govultr.NodePool) *NodeGroup { + + return &NodeGroup{ + id: "a", + clusterID: "a", + client: client, + nodePool: np, + minSize: np.MinNodes, + maxSize: np.MaxNodes, + } +}