Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: backoff addon upgrade on a per install basis #2112

Merged
merged 1 commit into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions internal/dinosaur/pkg/services/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ type AddonProvisioner struct {
ocmClient ocm.Client
customizations []addonCustomization
updateAddonStatusMetricFunc updateAddonStatusMetricFunc
lastStatus metrics.AddonStatus
lastUpgradeRequestTime time.Time
// lastStatusPerInstall holds the status for a specific addons installation on a cluster
// the id is clusterid:addonid, it maps to the last status of that install operation
lastStatusPerInstall map[string]metrics.AddonStatus
lastUpgradeRequestTime time.Time
}

// NewAddonProvisioner creates a new instance of AddonProvisioner
Expand All @@ -55,6 +57,7 @@ func NewAddonProvisioner(addonConfig *ocmImpl.AddonConfig, baseConfig *ocmImpl.O
ocmClient: ocmImpl.NewClient(conn),
customizations: initCustomizations(*addonConfig),
updateAddonStatusMetricFunc: metrics.UpdateClusterAddonStatusMetric,
lastStatusPerInstall: map[string]metrics.AddonStatus{},
}, nil
}

Expand Down Expand Up @@ -96,7 +99,7 @@ func (p *AddonProvisioner) Provision(cluster api.Cluster, dataplaneClusterConfig
for _, installedAddon := range installedAddons {
// addon is installed on the cluster but not present in gitops config - uninstall it
errs = append(errs, p.uninstallAddon(cluster.ClusterID, installedAddon.ID))
p.updateAddonStatus(installedAddon.ID, dataplaneClusterConfig.ClusterName, metrics.AddonHealthy)
p.updateAddonStatus(installedAddon.ID, dataplaneClusterConfig.ClusterName, cluster.ClusterID, metrics.AddonHealthy)
}

return errors.Join(errs...)
Expand All @@ -113,7 +116,7 @@ func (p *AddonProvisioner) provisionAddon(dataplaneClusterConfig gitops.DataPlan
if provisionError != nil {
status = metrics.AddonUnhealthy
}
p.updateAddonStatus(expectedConfig.ID, dataplaneClusterConfig.ClusterName, status)
p.updateAddonStatus(expectedConfig.ID, dataplaneClusterConfig.ClusterName, clusterID, status)
}()

if addonErr != nil {
Expand Down Expand Up @@ -233,7 +236,7 @@ func (p *AddonProvisioner) newInstallation(config gitops.AddonConfig) (*clusters
}

func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConfig) error {
if p.backoffUpgradeRequest() {
if p.backoffUpgradeRequest(config.ID, clusterID) {
glog.V(5).Infof("update addon request backoff for cluster: %s", clusterID)
return nil
}
Expand All @@ -250,8 +253,9 @@ func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConf
return nil
}

func (p *AddonProvisioner) backoffUpgradeRequest() bool {
return p.lastStatus != metrics.AddonHealthy && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff
func (p *AddonProvisioner) backoffUpgradeRequest(addonID string, clusterID string) bool {
id := installID(addonID, clusterID)
return p.lastStatusPerInstall[id] != metrics.AddonHealthy && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff
}

func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) error {
Expand All @@ -262,11 +266,20 @@ func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) erro
return nil
}

func (p *AddonProvisioner) updateAddonStatus(addonID string, clusterName string, status metrics.AddonStatus) {
func (p *AddonProvisioner) updateAddonStatus(addonID string, clusterName string, clusterID string, status metrics.AddonStatus) {
if p.updateAddonStatusMetricFunc != nil {
p.updateAddonStatusMetricFunc(addonID, clusterName, status)
}
p.lastStatus = status

if p.lastStatusPerInstall == nil {
p.lastStatusPerInstall = map[string]metrics.AddonStatus{}
}

p.lastStatusPerInstall[installID(addonID, clusterID)] = status
}

func installID(addonID string, clusterID string) string {
return fmt.Sprintf("%s:%s", clusterID, addonID)
}

func isFinalState(state clustersmgmtv1.AddOnInstallationState) bool {
Expand Down
10 changes: 7 additions & 3 deletions internal/dinosaur/pkg/services/addon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ func TestAddonProvisioner_Provision(t *testing.T) {
if tt.fields.ocmClient != nil {
if len(tt.fields.ocmClient.UpdateAddonInstallationCalls()) > 0 {
Expect(p.lastUpgradeRequestTime).NotTo(Equal(time.Time{}))
Expect(p.lastStatusPerInstall).NotTo(BeEmpty())
}
}

Expand Down Expand Up @@ -933,9 +934,11 @@ func TestAddonProvisioner_Provision_UpgradeBackoff(t *testing.T) {
InheritFleetshardSyncImageTag: true,
}
p := &AddonProvisioner{
ocmClient: ocmMock,
customizations: initCustomizations(addonConfig),
lastStatus: metrics.AddonUpgrade,
ocmClient: ocmMock,
customizations: initCustomizations(addonConfig),
lastStatusPerInstall: map[string]metrics.AddonStatus{
"cluster-id:acs-fleetshard": metrics.AddonUpgrade,
},
lastUpgradeRequestTime: time.Now(),
}
err := p.Provision(api.Cluster{
Expand All @@ -950,6 +953,7 @@ func TestAddonProvisioner_Provision_UpgradeBackoff(t *testing.T) {
}),
},
gitops.DataPlaneClusterConfig{
ClusterID: "cluster-id",
Addons: []gitops.AddonConfig{
{
ID: "acs-fleetshard",
Expand Down
Loading