Skip to content

Commit

Permalink
fix: backoff addon upgrade on a per install basis (#2112)
Browse files Browse the repository at this point in the history
backoff addon upgrade on a per install basis
  • Loading branch information
johannes94 authored Dec 4, 2024
1 parent e80323a commit c4b135c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 12 deletions.
31 changes: 22 additions & 9 deletions internal/dinosaur/pkg/services/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ type AddonProvisioner struct {
ocmClient ocm.Client
customizations []addonCustomization
updateAddonStatusMetricFunc updateAddonStatusMetricFunc
lastStatus metrics.AddonStatus
lastUpgradeRequestTime time.Time
// lastStatusPerInstall holds the status for a specific addons installation on a cluster
// the id is clusterid:addonid, it maps to the last status of that install operation
lastStatusPerInstall map[string]metrics.AddonStatus
lastUpgradeRequestTime time.Time
}

// NewAddonProvisioner creates a new instance of AddonProvisioner
Expand All @@ -55,6 +57,7 @@ func NewAddonProvisioner(addonConfig *ocmImpl.AddonConfig, baseConfig *ocmImpl.O
ocmClient: ocmImpl.NewClient(conn),
customizations: initCustomizations(*addonConfig),
updateAddonStatusMetricFunc: metrics.UpdateClusterAddonStatusMetric,
lastStatusPerInstall: map[string]metrics.AddonStatus{},
}, nil
}

Expand Down Expand Up @@ -96,7 +99,7 @@ func (p *AddonProvisioner) Provision(cluster api.Cluster, dataplaneClusterConfig
for _, installedAddon := range installedAddons {
// addon is installed on the cluster but not present in gitops config - uninstall it
errs = append(errs, p.uninstallAddon(cluster.ClusterID, installedAddon.ID))
p.updateAddonStatus(installedAddon.ID, dataplaneClusterConfig.ClusterName, metrics.AddonHealthy)
p.updateAddonStatus(installedAddon.ID, dataplaneClusterConfig.ClusterName, cluster.ClusterID, metrics.AddonHealthy)
}

return errors.Join(errs...)
Expand All @@ -113,7 +116,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan
if provisionError != nil {
status = metrics.AddonUnhealthy
}
p.updateAddonStatus(expectedConfig.ID, dataplaneClusterConfig.ClusterName, status)
p.updateAddonStatus(expectedConfig.ID, dataplaneClusterConfig.ClusterName, clusterID, status)
}()

if addonErr != nil {
Expand Down Expand Up @@ -233,7 +236,7 @@ func (p *AddonProvisioner) newInstallation(config gitops.AddonConfig) (*clusters
}

func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConfig) error {
if p.backoffUpgradeRequest() {
if p.backoffUpgradeRequest(config.ID, clusterID) {
glog.V(5).Infof("update addon request backoff for cluster: %s", clusterID)
return nil
}
Expand All @@ -250,8 +253,9 @@ func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConf
return nil
}

func (p *AddonProvisioner) backoffUpgradeRequest() bool {
return p.lastStatus != metrics.AddonHealthy && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff
func (p *AddonProvisioner) backoffUpgradeRequest(addonID string, clusterID string) bool {
id := installID(addonID, clusterID)
return p.lastStatusPerInstall[id] != metrics.AddonHealthy && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff
}

func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) error {
Expand All @@ -262,11 +266,20 @@ func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) erro
return nil
}

func (p *AddonProvisioner) updateAddonStatus(addonID string, clusterName string, status metrics.AddonStatus) {
func (p *AddonProvisioner) updateAddonStatus(addonID string, clusterName string, clusterID string, status metrics.AddonStatus) {
if p.updateAddonStatusMetricFunc != nil {
p.updateAddonStatusMetricFunc(addonID, clusterName, status)
}
p.lastStatus = status

if p.lastStatusPerInstall == nil {
p.lastStatusPerInstall = map[string]metrics.AddonStatus{}
}

p.lastStatusPerInstall[installID(addonID, clusterID)] = status
}

func installID(addonID string, clusterID string) string {
return fmt.Sprintf("%s:%s", clusterID, addonID)
}

func isFinalState(state clustersmgmtv1.AddOnInstallationState) bool {
Expand Down
10 changes: 7 additions & 3 deletions internal/dinosaur/pkg/services/addon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ func TestAddonProvisioner_Provision(t *testing.T) {
if tt.fields.ocmClient != nil {
if len(tt.fields.ocmClient.UpdateAddonInstallationCalls()) > 0 {
Expect(p.lastUpgradeRequestTime).NotTo(Equal(time.Time{}))
Expect(p.lastStatusPerInstall).NotTo(BeEmpty())
}
}

Expand Down Expand Up @@ -933,9 +934,11 @@ func TestAddonProvisioner_Provision_UpgradeBackoff(t *testing.T) {
InheritFleetshardSyncImageTag: true,
}
p := &AddonProvisioner{
ocmClient: ocmMock,
customizations: initCustomizations(addonConfig),
lastStatus: metrics.AddonUpgrade,
ocmClient: ocmMock,
customizations: initCustomizations(addonConfig),
lastStatusPerInstall: map[string]metrics.AddonStatus{
"cluster-id:acs-fleetshard": metrics.AddonUpgrade,
},
lastUpgradeRequestTime: time.Now(),
}
err := p.Provision(api.Cluster{
Expand All @@ -950,6 +953,7 @@ func TestAddonProvisioner_Provision_UpgradeBackoff(t *testing.T) {
}),
},
gitops.DataPlaneClusterConfig{
ClusterID: "cluster-id",
Addons: []gitops.AddonConfig{
{
ID: "acs-fleetshard",
Expand Down

0 comments on commit c4b135c

Please sign in to comment.