Skip to content

Commit

Permalink
feat: each node pool can now have different init configs
Browse files Browse the repository at this point in the history
  • Loading branch information
Silvest89 committed Oct 12, 2023
1 parent e7bf3ec commit aa4ea0e
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 19 deletions.
32 changes: 32 additions & 0 deletions cluster-autoscaler/cloudprovider/hetzner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,38 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes.

`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images. You can also use an image ID here (e.g. `15512617`), or a label selector associated with a custom snapshot (e.g. `customized_ubuntu=true`). The most recent snapshot will be used in the latter case.

`HCLOUD_CLUSTER_CONFIG` This is the new format replacing
* `HCLOUD_CLOUD_INIT`
* `HCLOUD_IMAGE`

Base64 encoded JSON according to the following structure

```json
{
"imagesForArch": { // These should be the same format as HCLOUD_IMAGE
"arm64": "",
"amd64": ""
},
"nodeConfigs": {
"pool1": { // This equals the pool name. Required for each pool that you have
"cloudInit": "", // HCLOUD_CLOUD_INIT make sure it isn't base64 encoded twice ;]
"labels": {
"node.kubernetes.io/role": "autoscaler-node"
},
"taints":
[
{
"key": "node.kubernetes.io/role",
"value": "autoscaler-node",
"effect": "NoExecute",
}
]
}
}
}
```


`HCLOUD_NETWORK` Default empty , The name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks

`HCLOUD_FIREWALL` Default empty , The name of the firewall that is used in the cluster , @see https://docs.hetzner.cloud/#firewalls
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,12 @@ func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscove
klog.Fatalf("Failed to create Hetzner cloud provider: %v", err)
}

if manager.clusterConfig.IsUsingNewFormat && len(manager.clusterConfig.NodeConfigs) == 0 {
klog.Fatalf("No cluster config present provider: %v", err)
}

validNodePoolName := regexp.MustCompile(`^[a-z0-9A-Z]+[a-z0-9A-Z\-\.\_]*[a-z0-9A-Z]+$|^[a-z0-9A-Z]{1}$`)
clusterUpdateLock := sync.Mutex{}

for _, nodegroupSpec := range do.NodeGroupSpecs {
spec, err := createNodePoolSpec(nodegroupSpec)
if err != nil {
Expand All @@ -206,6 +209,13 @@ func BuildHetzner(_ config.AutoscalingOptions, do cloudprovider.NodeGroupDiscove
klog.Fatalf("Failed to get servers for for node pool %s error: %v", nodegroupSpec, err)
}

if manager.clusterConfig.IsUsingNewFormat {
_, ok := manager.clusterConfig.NodeConfigs[spec.name]
if !ok {
klog.Fatalf("No node config present for node id `%s` error: %v", spec.name, err)
}
}

manager.nodeGroups[spec.name] = &hetznerNodeGroup{
manager: manager,
id: spec.name,
Expand Down
68 changes: 56 additions & 12 deletions cluster-autoscaler/cloudprovider/hetzner/hetzner_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package hetzner
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"net/http"
Expand All @@ -45,8 +46,7 @@ type hetznerManager struct {
client *hcloud.Client
nodeGroups map[string]*hetznerNodeGroup
apiCallContext context.Context
cloudInit string
image string
clusterConfig *ClusterConfig
sshKey *hcloud.SSHKey
network *hcloud.Network
firewall *hcloud.Firewall
Expand All @@ -57,6 +57,29 @@ type hetznerManager struct {
cachedServers *serversCache
}

type ClusterConfig struct {
ImagesForArch ImageList
NodeConfigs map[string]*NodeConfig
IsUsingNewFormat bool
LegacyConfig LegacyConfig
}

type ImageList struct {
Arm64 string
Amd64 string
}

type NodeConfig struct {
CloudInit string
Taints []apiv1.Taint
Labels map[string]string
}

type LegacyConfig struct {
CloudInit string
ImageName string
}

func newManager() (*hetznerManager, error) {
token := os.Getenv("HCLOUD_TOKEN")
if token == "" {
Expand All @@ -71,19 +94,41 @@ func newManager() (*hetznerManager, error) {
)

ctx := context.Background()
var err error

clusterConfigBase64 := os.Getenv("HCLOUD_CLUSTER_CONFIG")
cloudInitBase64 := os.Getenv("HCLOUD_CLOUD_INIT")
if cloudInitBase64 == "" {
return nil, errors.New("`HCLOUD_CLOUD_INIT` is not specified")

if clusterConfigBase64 == "" && cloudInitBase64 == "" {
return nil, errors.New("`HCLOUD_CLUSTER_CONFIG` or `HCLOUD_CLOUD_INIT` is not specified")
}
cloudInit, err := base64.StdEncoding.DecodeString(cloudInitBase64)
if err != nil {
return nil, fmt.Errorf("failed to parse cloud init error: %s", err)
var clusterConfig *ClusterConfig = &ClusterConfig{}

if clusterConfigBase64 != "" {
clusterConfig.IsUsingNewFormat = true
}

if clusterConfig.IsUsingNewFormat {
clusterConfigEnv, err := base64.StdEncoding.DecodeString(clusterConfigBase64)
if err != nil {
return nil, fmt.Errorf("failed to parse cluster config error: %s", err)
}
json.Unmarshal([]byte(string(clusterConfigEnv)), &clusterConfig)
}

imageName := os.Getenv("HCLOUD_IMAGE")
if imageName == "" {
imageName = "ubuntu-20.04"
if !clusterConfig.IsUsingNewFormat {
cloudInit, err := base64.StdEncoding.DecodeString(cloudInitBase64)
if err != nil {
return nil, fmt.Errorf("failed to parse cloud init error: %s", err)
}

imageName := os.Getenv("HCLOUD_IMAGE")
if imageName == "" {
imageName = "ubuntu-20.04"
}

clusterConfig.LegacyConfig.CloudInit = string(cloudInit)
clusterConfig.LegacyConfig.ImageName = imageName
}

publicIPv4 := true
Expand Down Expand Up @@ -141,15 +186,14 @@ func newManager() (*hetznerManager, error) {
m := &hetznerManager{
client: client,
nodeGroups: make(map[string]*hetznerNodeGroup),
cloudInit: string(cloudInit),
image: imageName,
sshKey: sshKey,
network: network,
firewall: firewall,
createTimeout: createTimeout,
apiCallContext: ctx,
publicIPv4: publicIPv4,
publicIPv6: publicIPv6,
clusterConfig: clusterConfig,
cachedServerType: newServerTypeCache(ctx, client),
cachedServers: newServersCache(ctx, client),
}
Expand Down
49 changes: 43 additions & 6 deletions cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package hetzner
import (
"context"
"fmt"
"maps"
"math/rand"
"strings"
"sync"
Expand Down Expand Up @@ -241,6 +242,16 @@ func (n *hetznerNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, err
}
node.Labels = cloudprovider.JoinStringMaps(node.Labels, nodeGroupLabels)

if n.manager.clusterConfig.IsUsingNewFormat && n.id != drainingNodePoolId {
for _, taint := range n.manager.clusterConfig.NodeConfigs[n.id].Taints {
node.Spec.Taints = append(node.Spec.Taints, apiv1.Taint{
Key: taint.Key,
Value: taint.Value,
Effect: taint.Effect,
})
}
}

nodeInfo := schedulerframework.NewNodeInfo(cloudprovider.BuildKubeProxy(n.id))
nodeInfo.SetNode(&node)

Expand Down Expand Up @@ -325,14 +336,23 @@ func buildNodeGroupLabels(n *hetznerNodeGroup) (map[string]string, error) {
if err != nil {
return nil, err
}
klog.V(4).Infof("Build node group label for %s", n.id)

return map[string]string{
labels := map[string]string{
apiv1.LabelInstanceType: n.instanceType,
apiv1.LabelTopologyRegion: n.region,
apiv1.LabelArchStable: archLabel,
"csi.hetzner.cloud/location": n.region,
nodeGroupLabel: n.id,
}, nil
}

if n.manager.clusterConfig.IsUsingNewFormat && n.id != drainingNodePoolId {
maps.Copy(labels, n.manager.clusterConfig.NodeConfigs[n.id].Labels)
}

klog.V(4).Infof("%s nodegroup labels: %s", n.id, labels)

return labels, nil
}

func getMachineTypeResourceList(m *hetznerManager, instanceType string) (apiv1.ResourceList, error) {
Expand Down Expand Up @@ -392,10 +412,16 @@ func createServer(n *hetznerNodeGroup) error {
return err
}

cloudInit := n.manager.clusterConfig.LegacyConfig.CloudInit

if n.manager.clusterConfig.IsUsingNewFormat {
cloudInit = n.manager.clusterConfig.NodeConfigs[n.id].CloudInit
}

StartAfterCreate := true
opts := hcloud.ServerCreateOpts{
Name: newNodeName(n),
UserData: n.manager.cloudInit,
UserData: string(cloudInit),
Location: &hcloud.Location{Name: n.region},
ServerType: serverType,
Image: image,
Expand Down Expand Up @@ -443,7 +469,18 @@ func createServer(n *hetznerNodeGroup) error {
// server.
func findImage(n *hetznerNodeGroup, serverType *hcloud.ServerType) (*hcloud.Image, error) {
// Select correct image based on server type architecture
image, _, err := n.manager.client.Image.GetForArchitecture(context.TODO(), n.manager.image, serverType.Architecture)
imageName := n.manager.clusterConfig.LegacyConfig.ImageName
if n.manager.clusterConfig.IsUsingNewFormat {
if serverType.Architecture == hcloud.ArchitectureARM {
imageName = n.manager.clusterConfig.ImagesForArch.Arm64
}

if serverType.Architecture == hcloud.ArchitectureX86 {
imageName = n.manager.clusterConfig.ImagesForArch.Amd64
}
}

image, _, err := n.manager.client.Image.GetForArchitecture(context.TODO(), imageName, serverType.Architecture)
if err != nil {
// Keep looking for label if image was not found by id or name
if !strings.HasPrefix(err.Error(), "image not found") {
Expand All @@ -462,12 +499,12 @@ func findImage(n *hetznerNodeGroup, serverType *hcloud.ServerType) (*hcloud.Imag
Sort: []string{"created:desc"},
Architecture: []hcloud.Architecture{serverType.Architecture},
ListOpts: hcloud.ListOpts{
LabelSelector: n.manager.image,
LabelSelector: imageName,
},
})

if err != nil || len(images) == 0 {
return nil, fmt.Errorf("unable to find image %s with architecture %s: %v", n.manager.image, serverType.Architecture, err)
return nil, fmt.Errorf("unable to find image %s with architecture %s: %v", imageName, serverType.Architecture, err)
}

return images[0], nil
Expand Down

0 comments on commit aa4ea0e

Please sign in to comment.