From 6df91dad4336c4cb74b327e28bd36114a2a00dbe Mon Sep 17 00:00:00 2001 From: Megamind <882485+jeff-mccoy@users.noreply.github.com> Date: Fri, 3 Feb 2023 13:53:08 -0600 Subject: [PATCH] Smarter Zarf Registry HPA behavior (#1317) Make Registry HPA less aggressive and block from downscaling while running `zarf package deploy` if images exist. ## Type of change - [x] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Other (security config, docs update, etc) ## Checklist before merging - [x] Test, docs, adr added or updated as needed - [x] [Contributor Guide Steps](https://github.com/defenseunicorns/zarf/blob/main/CONTRIBUTING.md#developer-workflow) followed --- .../zarf-registry/chart/templates/hpa.yaml | 27 ++++- packages/zarf-registry/registry-values.yaml | 6 + packages/zarf-registry/zarf.yaml | 12 ++ src/internal/cluster/injector.go | 21 +++- src/internal/cluster/seed.go | 107 ------------------ src/internal/cluster/state.go | 74 ++++++++++++ src/internal/cluster/zarf.go | 39 +++++++ src/pkg/k8s/hpa.go | 36 ++++++ src/pkg/packager/deploy.go | 27 ++++- 9 files changed, 234 insertions(+), 115 deletions(-) delete mode 100644 src/internal/cluster/seed.go create mode 100644 src/pkg/k8s/hpa.go diff --git a/packages/zarf-registry/chart/templates/hpa.yaml b/packages/zarf-registry/chart/templates/hpa.yaml index a4c1ef68d0..671b5e68d9 100644 --- a/packages/zarf-registry/chart/templates/hpa.yaml +++ b/packages/zarf-registry/chart/templates/hpa.yaml @@ -1,5 +1,5 @@ {{- if .Values.autoscaling.enabled }} -apiVersion: autoscaling/v1 +apiVersion: autoscaling/v2 kind: HorizontalPodAutoscaler metadata: name: {{ template "docker-registry.fullname" . }} @@ -15,5 +15,28 @@ spec: name: {{ template "docker-registry.fullname" . }} minReplicas: {{ .Values.autoscaling.minReplicas }} maxReplicas: {{ .Values.autoscaling.maxReplicas }} - targetCPUUtilizationPercentage: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + behavior: + scaleDown: + # Use 60 second stabilization window becuase zarf will freeze scale down during deploys + stabilizationWindowSeconds: 60 + # Scale down one pod per minute + policies: + - type: Pods + value: 1 + periodSeconds: 60 + scaleUp: + # Delay initial checks by 30 seconds + stabilizationWindowSeconds: 30 + # Scale up one pod per minute + policies: + - type: Pods + value: 1 + periodSeconds: 60 {{- end }} diff --git a/packages/zarf-registry/registry-values.yaml b/packages/zarf-registry/registry-values.yaml index 01eef4f026..050debb97a 100644 --- a/packages/zarf-registry/registry-values.yaml +++ b/packages/zarf-registry/registry-values.yaml @@ -30,3 +30,9 @@ fullnameOverride: "zarf-docker-registry" podLabels: zarf.dev/agent: "ignore" + +autoscaling: + enabled: ###ZARF_VAR_REGISTRY_HPA_ENABLE### + minReplicas: "###ZARF_VAR_REGISTRY_HPA_MIN###" + maxReplicas: "###ZARF_VAR_REGISTRY_HPA_MAX###" + targetCPUUtilizationPercentage: 80 diff --git a/packages/zarf-registry/zarf.yaml b/packages/zarf-registry/zarf.yaml index 618ac116cc..609dd3c06f 100644 --- a/packages/zarf-registry/zarf.yaml +++ b/packages/zarf-registry/zarf.yaml @@ -27,6 +27,18 @@ variables: description: "The memory limit for the registry" default: "2Gi" + - name: REGISTRY_HPA_MIN + description: "The minimum number of registry replicas" + default: "1" + + - name: REGISTRY_HPA_MAX + description: "The maximum number of registry replicas" + default: "5" + + - name: REGISTRY_HPA_ENABLE + description: "Enable the Horizontal Pod Autoscaler for the registry" + default: "true" + components: - name: zarf-seed-registry description: | diff --git a/src/internal/cluster/injector.go b/src/internal/cluster/injector.go index fc120172bc..b0aca683d7 100644 --- a/src/internal/cluster/injector.go +++ b/src/internal/cluster/injector.go @@ -26,8 +26,8 @@ import ( // The chunk size for the tarball chunks. var payloadChunkSize = 1024 * 768 -// RunInjectionMadness initializes a Zarf injection into the cluster. -func (c *Cluster) RunInjectionMadness(tempPath types.TempPaths) { +// StartInjectionMadness initializes a Zarf injection into the cluster. +func (c *Cluster) StartInjectionMadness(tempPath types.TempPaths) { message.Debugf("packager.runInjectionMadness(%#v)", tempPath) spinner := message.NewProgressSpinner("Attempting to bootstrap the seed image into the cluster") @@ -104,6 +104,23 @@ func (c *Cluster) RunInjectionMadness(tempPath types.TempPaths) { spinner.Fatalf(nil, "Unable to perform the injection") } +// StopInjectionMadness handles cleanup once the seed registry is up. +func (c *Cluster) StopInjectionMadness() error { + // Try to kill the injector pod now + if err := c.Kube.DeletePod(ZarfNamespace, "injector"); err != nil { + return err + } + + // Remove the configmaps + labelMatch := map[string]string{"zarf-injector": "payload"} + if err := c.Kube.DeleteConfigMapsByLabel(ZarfNamespace, labelMatch); err != nil { + return err + } + + // Remove the injector service + return c.Kube.DeleteService(ZarfNamespace, "zarf-injector") +} + func (c *Cluster) createPayloadConfigmaps(tempPath types.TempPaths, spinner *message.Spinner) ([]string, string, error) { message.Debugf("packager.tryInjectorPayloadDeploy(%#v)", tempPath) var configMaps []string diff --git a/src/internal/cluster/seed.go b/src/internal/cluster/seed.go deleted file mode 100644 index 4522c2c0f2..0000000000 --- a/src/internal/cluster/seed.go +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: 2021-Present The Zarf Authors - -// Package cluster contains Zarf-specific cluster management functions. -package cluster - -import ( - "fmt" - - "github.com/defenseunicorns/zarf/src/config" - "github.com/defenseunicorns/zarf/src/pkg/message" - "github.com/defenseunicorns/zarf/src/pkg/utils" - "github.com/defenseunicorns/zarf/src/types" -) - -// PostSeedRegistry handles cleanup once the seed registry is up. -func (c *Cluster) PostSeedRegistry(tempPath types.TempPaths) error { - message.Debugf("cluster.PostSeedRegistry(%#v)", tempPath) - - // Try to kill the injector pod now - if err := c.Kube.DeletePod(ZarfNamespace, "injector"); err != nil { - return err - } - - // Remove the configmaps - labelMatch := map[string]string{"zarf-injector": "payload"} - if err := c.Kube.DeleteConfigMapsByLabel(ZarfNamespace, labelMatch); err != nil { - return err - } - - // Remove the injector service - return c.Kube.DeleteService(ZarfNamespace, "zarf-injector") -} - -func (c *Cluster) fillInEmptyContainerRegistryValues(containerRegistry types.RegistryInfo) types.RegistryInfo { - // Set default NodePort if none was provided - if containerRegistry.NodePort == 0 { - containerRegistry.NodePort = config.ZarfInClusterContainerRegistryNodePort - } - - // Set default url if an external registry was not provided - if containerRegistry.Address == "" { - containerRegistry.InternalRegistry = true - containerRegistry.Address = fmt.Sprintf("%s:%d", config.IPV4Localhost, containerRegistry.NodePort) - } - - // Generate a push-user password if not provided by init flag - if containerRegistry.PushPassword == "" { - containerRegistry.PushPassword = utils.RandomString(config.ZarfGeneratedPasswordLen) - } - - // Set pull-username if not provided by init flag - if containerRegistry.PullUsername == "" { - if containerRegistry.InternalRegistry { - containerRegistry.PullUsername = config.ZarfRegistryPullUser - } else { - // If this is an external registry and a pull-user wasn't provided, use the same credentials as the push user - containerRegistry.PullUsername = containerRegistry.PushUsername - } - } - if containerRegistry.PullPassword == "" { - if containerRegistry.InternalRegistry { - containerRegistry.PullPassword = utils.RandomString(config.ZarfGeneratedPasswordLen) - } else { - // If this is an external registry and a pull-user wasn't provided, use the same credentials as the push user - containerRegistry.PullPassword = containerRegistry.PushPassword - } - } - - if containerRegistry.Secret == "" { - containerRegistry.Secret = utils.RandomString(config.ZarfGeneratedSecretLen) - } - - return containerRegistry -} - -// Fill in empty GitServerInfo values with the defaults. -func (c *Cluster) fillInEmptyGitServerValues(gitServer types.GitServerInfo) types.GitServerInfo { - // Set default svc url if an external repository was not provided - if gitServer.Address == "" { - gitServer.Address = config.ZarfInClusterGitServiceURL - gitServer.InternalServer = true - } - - // Generate a push-user password if not provided by init flag - if gitServer.PushPassword == "" { - gitServer.PushPassword = utils.RandomString(config.ZarfGeneratedPasswordLen) - } - - // Set read-user information if using an internal repository, otherwise copy from the push-user - if gitServer.PullUsername == "" { - if gitServer.InternalServer { - gitServer.PullUsername = config.ZarfGitReadUser - } else { - gitServer.PullUsername = gitServer.PushUsername - } - } - if gitServer.PullPassword == "" { - if gitServer.InternalServer { - gitServer.PullPassword = utils.RandomString(config.ZarfGeneratedPasswordLen) - } else { - gitServer.PullPassword = gitServer.PushPassword - } - } - - return gitServer -} diff --git a/src/internal/cluster/state.go b/src/internal/cluster/state.go index 67b631c047..94e078e1cc 100644 --- a/src/internal/cluster/state.go +++ b/src/internal/cluster/state.go @@ -202,3 +202,77 @@ func (c *Cluster) SaveZarfState(state types.ZarfState) error { return nil } + +func (c *Cluster) fillInEmptyContainerRegistryValues(containerRegistry types.RegistryInfo) types.RegistryInfo { + // Set default NodePort if none was provided + if containerRegistry.NodePort == 0 { + containerRegistry.NodePort = config.ZarfInClusterContainerRegistryNodePort + } + + // Set default url if an external registry was not provided + if containerRegistry.Address == "" { + containerRegistry.InternalRegistry = true + containerRegistry.Address = fmt.Sprintf("%s:%d", config.IPV4Localhost, containerRegistry.NodePort) + } + + // Generate a push-user password if not provided by init flag + if containerRegistry.PushPassword == "" { + containerRegistry.PushPassword = utils.RandomString(config.ZarfGeneratedPasswordLen) + } + + // Set pull-username if not provided by init flag + if containerRegistry.PullUsername == "" { + if containerRegistry.InternalRegistry { + containerRegistry.PullUsername = config.ZarfRegistryPullUser + } else { + // If this is an external registry and a pull-user wasn't provided, use the same credentials as the push user + containerRegistry.PullUsername = containerRegistry.PushUsername + } + } + if containerRegistry.PullPassword == "" { + if containerRegistry.InternalRegistry { + containerRegistry.PullPassword = utils.RandomString(config.ZarfGeneratedPasswordLen) + } else { + // If this is an external registry and a pull-user wasn't provided, use the same credentials as the push user + containerRegistry.PullPassword = containerRegistry.PushPassword + } + } + + if containerRegistry.Secret == "" { + containerRegistry.Secret = utils.RandomString(config.ZarfGeneratedSecretLen) + } + + return containerRegistry +} + +// Fill in empty GitServerInfo values with the defaults. +func (c *Cluster) fillInEmptyGitServerValues(gitServer types.GitServerInfo) types.GitServerInfo { + // Set default svc url if an external repository was not provided + if gitServer.Address == "" { + gitServer.Address = config.ZarfInClusterGitServiceURL + gitServer.InternalServer = true + } + + // Generate a push-user password if not provided by init flag + if gitServer.PushPassword == "" { + gitServer.PushPassword = utils.RandomString(config.ZarfGeneratedPasswordLen) + } + + // Set read-user information if using an internal repository, otherwise copy from the push-user + if gitServer.PullUsername == "" { + if gitServer.InternalServer { + gitServer.PullUsername = config.ZarfGitReadUser + } else { + gitServer.PullUsername = gitServer.PushUsername + } + } + if gitServer.PullPassword == "" { + if gitServer.InternalServer { + gitServer.PullPassword = utils.RandomString(config.ZarfGeneratedPasswordLen) + } else { + gitServer.PullPassword = gitServer.PushPassword + } + } + + return gitServer +} diff --git a/src/internal/cluster/zarf.go b/src/internal/cluster/zarf.go index 63797a6e91..28d509f850 100644 --- a/src/internal/cluster/zarf.go +++ b/src/internal/cluster/zarf.go @@ -11,6 +11,7 @@ import ( "github.com/defenseunicorns/zarf/src/config" "github.com/defenseunicorns/zarf/src/pkg/message" "github.com/defenseunicorns/zarf/src/types" + autoscalingV2 "k8s.io/api/autoscaling/v2" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -98,3 +99,41 @@ func (c *Cluster) RecordPackageDeployment(pkg types.ZarfPackage, components []ty c.Kube.CreateOrUpdateSecret(deployedPackageSecret) } + +// EnableRegHPAScaleDown enables the HPA scale down for the Zarf Registry. +func (c *Cluster) EnableRegHPAScaleDown() error { + hpa, err := c.Kube.GetHPA(ZarfNamespace, "zarf-docker-registry") + if err != nil { + return err + } + + // Enable HPA scale down. + policy := autoscalingV2.MinChangePolicySelect + hpa.Spec.Behavior.ScaleDown.SelectPolicy = &policy + + // Save the HPA changes. + if _, err = c.Kube.UpdateHPA(hpa); err != nil { + return err + } + + return nil +} + +// DisableRegHPAScaleDown disables the HPA scale down for the Zarf Registry. +func (c *Cluster) DisableRegHPAScaleDown() error { + hpa, err := c.Kube.GetHPA(ZarfNamespace, "zarf-docker-registry") + if err != nil { + return err + } + + // Disable HPA scale down. + policy := autoscalingV2.DisabledPolicySelect + hpa.Spec.Behavior.ScaleDown.SelectPolicy = &policy + + // Save the HPA changes. + if _, err = c.Kube.UpdateHPA(hpa); err != nil { + return err + } + + return nil +} diff --git a/src/pkg/k8s/hpa.go b/src/pkg/k8s/hpa.go new file mode 100644 index 0000000000..a159823f67 --- /dev/null +++ b/src/pkg/k8s/hpa.go @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: 2021-Present The Zarf Authors + +// Package k8s provides a client for interacting with a Kubernetes cluster. +package k8s + +import ( + "context" + + autoscalingV2 "k8s.io/api/autoscaling/v2" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// GetAllHPAs returns a list of horizontal pod autoscalers for all namespaces. +func (k *K8s) GetAllHPAs() (*autoscalingV2.HorizontalPodAutoscalerList, error) { + return k.GetHPAs(corev1.NamespaceAll) +} + +// GetHPAs returns a list of horizontal pod autoscalers in a given namespace. +func (k *K8s) GetHPAs(namespace string) (*autoscalingV2.HorizontalPodAutoscalerList, error) { + metaOptions := metav1.ListOptions{} + return k.Clientset.AutoscalingV2().HorizontalPodAutoscalers(namespace).List(context.TODO(), metaOptions) +} + +// GetHPA returns a single horizontal pod autoscaler by namespace and name. +func (k *K8s) GetHPA(namespace, name string) (*autoscalingV2.HorizontalPodAutoscaler, error) { + metaOptions := metav1.GetOptions{} + return k.Clientset.AutoscalingV2().HorizontalPodAutoscalers(namespace).Get(context.TODO(), name, metaOptions) +} + +// UpdateHPA updates the given horizontal pod autoscaler in the cluster. +func (k *K8s) UpdateHPA(hpa *autoscalingV2.HorizontalPodAutoscaler) (*autoscalingV2.HorizontalPodAutoscaler, error) { + metaOptions := metav1.UpdateOptions{} + return k.Clientset.AutoscalingV2().HorizontalPodAutoscalers(hpa.Namespace).Update(context.TODO(), hpa, metaOptions) +} diff --git a/src/pkg/packager/deploy.go b/src/pkg/packager/deploy.go index 8f6130f5cf..8e6859a9e9 100644 --- a/src/pkg/packager/deploy.go +++ b/src/pkg/packager/deploy.go @@ -28,8 +28,11 @@ import ( corev1 "k8s.io/api/core/v1" ) -var valueTemplate template.Values -var connectStrings = make(types.ConnectStrings) +var ( + hpaModified bool + valueTemplate template.Values + connectStrings = make(types.ConnectStrings) +) // Deploy attempts to deploy the given PackageConfig. func (p *Packager) Deploy() error { @@ -54,6 +57,13 @@ func (p *Packager) Deploy() error { return fmt.Errorf("unable to set the active variables: %w", err) } + // Reset registry HPA scale down whether an error occurs or not + defer func() { + if p.cluster != nil && hpaModified { + p.cluster.EnableRegHPAScaleDown() + } + }() + // Get a list of all the components we are deploying and actually deploy them deployedComponents, err := p.deployComponents() if err != nil { @@ -144,7 +154,7 @@ func (p *Packager) deployInitComponent(component types.ZarfComponent) (charts [] // Before deploying the seed registry, start the injector if isSeedRegistry { - p.cluster.RunInjectionMadness(p.tmp) + p.cluster.StartInjectionMadness(p.tmp) } charts, err = p.deployComponent(component, isAgent /* skip img checksum if isAgent */) @@ -154,7 +164,7 @@ func (p *Packager) deployInitComponent(component types.ZarfComponent) (charts [] // Do cleanup for when we inject the seed registry during initialization if isSeedRegistry { - err := p.cluster.PostSeedRegistry(p.tmp) + err := p.cluster.StopInjectionMadness() if err != nil { return charts, fmt.Errorf("unable to seed the Zarf Registry: %w", err) } @@ -216,6 +226,15 @@ func (p *Packager) deployComponent(component types.ZarfComponent, noImgChecksum } } + // Disable the registry HPA scale down if we are deploying images and it is not already disabled + if hasImages && !hpaModified && p.cfg.State.RegistryInfo.InternalRegistry { + if err := p.cluster.DisableRegHPAScaleDown(); err != nil { + message.Debugf("unable to toggle the registry HPA scale down: %s", err.Error()) + } else { + hpaModified = true + } + } + valueTemplate, err = p.getUpdatedValueTemplate(component) if err != nil { return charts, fmt.Errorf("unable to get the updated value template: %w", err)