From 638b79768293fb806a0a9507a97c635159326c8d Mon Sep 17 00:00:00 2001 From: Johannes Malsam Date: Tue, 3 Dec 2024 15:19:03 +0100 Subject: [PATCH] backoff addon upgrade on a per install basis --- internal/dinosaur/pkg/services/addon.go | 31 ++++++++++++++------ internal/dinosaur/pkg/services/addon_test.go | 10 +++++-- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/internal/dinosaur/pkg/services/addon.go b/internal/dinosaur/pkg/services/addon.go index 616773345..dc9e0e647 100644 --- a/internal/dinosaur/pkg/services/addon.go +++ b/internal/dinosaur/pkg/services/addon.go @@ -34,8 +34,10 @@ type AddonProvisioner struct { ocmClient ocm.Client customizations []addonCustomization updateAddonStatusMetricFunc updateAddonStatusMetricFunc - lastStatus metrics.AddonStatus - lastUpgradeRequestTime time.Time + // lastStatusPerInstall holds the status for a specific addons installation on a cluster + // the id is clusterid:addonid, it maps to the last status of that install operation + lastStatusPerInstall map[string]metrics.AddonStatus + lastUpgradeRequestTime time.Time } // NewAddonProvisioner creates a new instance of AddonProvisioner @@ -55,6 +57,7 @@ func NewAddonProvisioner(addonConfig *ocmImpl.AddonConfig, baseConfig *ocmImpl.O ocmClient: ocmImpl.NewClient(conn), customizations: initCustomizations(*addonConfig), updateAddonStatusMetricFunc: metrics.UpdateClusterAddonStatusMetric, + lastStatusPerInstall: map[string]metrics.AddonStatus{}, }, nil } @@ -96,7 +99,7 @@ func (p *AddonProvisioner) Provision(cluster api.Cluster, dataplaneClusterConfig for _, installedAddon := range installedAddons { // addon is installed on the cluster but not present in gitops config - uninstall it errs = append(errs, p.uninstallAddon(cluster.ClusterID, installedAddon.ID)) - p.updateAddonStatus(installedAddon.ID, dataplaneClusterConfig.ClusterName, metrics.AddonHealthy) + p.updateAddonStatus(installedAddon.ID, dataplaneClusterConfig.ClusterName, cluster.ClusterID, metrics.AddonHealthy) } return errors.Join(errs...) @@ -113,7 +116,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan if provisionError != nil { status = metrics.AddonUnhealthy } - p.updateAddonStatus(expectedConfig.ID, dataplaneClusterConfig.ClusterName, status) + p.updateAddonStatus(expectedConfig.ID, dataplaneClusterConfig.ClusterName, clusterID, status) }() if addonErr != nil { @@ -233,7 +236,7 @@ func (p *AddonProvisioner) newInstallation(config gitops.AddonConfig) (*clusters } func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConfig) error { - if p.backoffUpgradeRequest() { + if p.backoffUpgradeRequest(config.ID, clusterID) { glog.V(5).Infof("update addon request backoff for cluster: %s", clusterID) return nil } @@ -250,8 +253,9 @@ func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConf return nil } -func (p *AddonProvisioner) backoffUpgradeRequest() bool { - return p.lastStatus != metrics.AddonHealthy && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff +func (p *AddonProvisioner) backoffUpgradeRequest(addonID string, clusterID string) bool { + id := installID(addonID, clusterID) + return p.lastStatusPerInstall[id] != metrics.AddonHealthy && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff } func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) error { @@ -262,11 +266,20 @@ func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) erro return nil } -func (p *AddonProvisioner) updateAddonStatus(addonID string, clusterName string, status metrics.AddonStatus) { +func (p *AddonProvisioner) updateAddonStatus(addonID string, clusterName string, clusterID string, status metrics.AddonStatus) { if p.updateAddonStatusMetricFunc != nil { p.updateAddonStatusMetricFunc(addonID, clusterName, status) } - p.lastStatus = status + + if p.lastStatusPerInstall == nil { + p.lastStatusPerInstall = map[string]metrics.AddonStatus{} + } + + p.lastStatusPerInstall[installID(addonID, clusterID)] = status +} + +func installID(addonID string, clusterID string) string { + return fmt.Sprintf("%s:%s", clusterID, addonID) } func isFinalState(state clustersmgmtv1.AddOnInstallationState) bool { diff --git a/internal/dinosaur/pkg/services/addon_test.go b/internal/dinosaur/pkg/services/addon_test.go index a3782829b..940f1fd12 100644 --- a/internal/dinosaur/pkg/services/addon_test.go +++ b/internal/dinosaur/pkg/services/addon_test.go @@ -666,6 +666,7 @@ func TestAddonProvisioner_Provision(t *testing.T) { if tt.fields.ocmClient != nil { if len(tt.fields.ocmClient.UpdateAddonInstallationCalls()) > 0 { Expect(p.lastUpgradeRequestTime).NotTo(Equal(time.Time{})) + Expect(p.lastStatusPerInstall).NotTo(BeEmpty()) } } @@ -933,9 +934,11 @@ func TestAddonProvisioner_Provision_UpgradeBackoff(t *testing.T) { InheritFleetshardSyncImageTag: true, } p := &AddonProvisioner{ - ocmClient: ocmMock, - customizations: initCustomizations(addonConfig), - lastStatus: metrics.AddonUpgrade, + ocmClient: ocmMock, + customizations: initCustomizations(addonConfig), + lastStatusPerInstall: map[string]metrics.AddonStatus{ + "cluster-id:acs-fleetshard": metrics.AddonUpgrade, + }, lastUpgradeRequestTime: time.Now(), } err := p.Provision(api.Cluster{ @@ -950,6 +953,7 @@ func TestAddonProvisioner_Provision_UpgradeBackoff(t *testing.T) { }), }, gitops.DataPlaneClusterConfig{ + ClusterID: "cluster-id", Addons: []gitops.AddonConfig{ { ID: "acs-fleetshard",