Skip to content

Commit

Permalink
ROX-26421: add backoff logic if addon is stuck in upgrading (#2090)
Browse files Browse the repository at this point in the history
* add backoff logic if addon is stuck in upgrading

* PR feedback
  • Loading branch information
johannes94 authored Nov 18, 2024
1 parent 36a5f68 commit b456aa8
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
19 changes: 19 additions & 0 deletions internal/dinosaur/pkg/services/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"fmt"
"time"

"github.com/golang/glog"
clustersmgmtv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
Expand All @@ -20,13 +21,21 @@ import (

const fleetshardImageTagParameter = "fleetshardSyncImageTag"

// This backoff was introduced to prevent reaching OCM service log limits
// by sending too many addon upgrade requests. The limit is 1000. A backoff
// of 20 minutes would result in 504 requests per week, which leaves some space
// for other openshift components to call the API as well.
const addonUpgradeBackoff = 20 * time.Minute

type updateAddonStatusMetricFunc func(addonID, clusterName string, status metrics.AddonStatus)

// AddonProvisioner keeps addon installations on the data plane clusters up-to-date
type AddonProvisioner struct {
ocmClient ocm.Client
customizations []addonCustomization
updateAddonStatusMetricFunc updateAddonStatusMetricFunc
lastStatus metrics.AddonStatus
lastUpgradeRequestTime time.Time
}

// NewAddonProvisioner creates a new instance of AddonProvisioner
Expand Down Expand Up @@ -224,17 +233,27 @@ func (p *AddonProvisioner) newInstallation(config gitops.AddonConfig) (*clusters
}

func (p *AddonProvisioner) updateAddon(clusterID string, config gitops.AddonConfig) error {
if p.backoffUpgradeRequest() {
glog.V(5).Infof("update addon request backoff for cluster: %s", clusterID)
return nil
}

update, err := p.newInstallation(config)
if err != nil {
return err
}
p.lastUpgradeRequestTime = time.Now()
if err := p.ocmClient.UpdateAddonInstallation(clusterID, update); err != nil {
return fmt.Errorf("update addon %s: %w", update.ID(), err)
}
glog.V(5).Infof("Addon %s has been updated on the cluster %s", config.ID, clusterID)
return nil
}

func (p *AddonProvisioner) backoffUpgradeRequest() bool {
return p.lastStatus == metrics.AddonUpgrade && time.Since(p.lastUpgradeRequestTime) < addonUpgradeBackoff
}

func (p *AddonProvisioner) uninstallAddon(clusterID string, addonID string) error {
if err := p.ocmClient.DeleteAddonInstallation(clusterID, addonID); err != nil {
return fmt.Errorf("uninstall addon %s: %w", addonID, err)
Expand Down
70 changes: 70 additions & 0 deletions internal/dinosaur/pkg/services/addon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package services
import (
"encoding/json"
"testing"
"time"

. "github.com/onsi/gomega"
clustersmgmtv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1"
Expand Down Expand Up @@ -661,6 +662,13 @@ func TestAddonProvisioner_Provision(t *testing.T) {
if tt.want != nil {
tt.want(tt.fields.ocmClient)
}

if tt.fields.ocmClient != nil {
if len(tt.fields.ocmClient.UpdateAddonInstallationCalls()) > 0 {
Expect(p.lastUpgradeRequestTime).NotTo(Equal(time.Time{}))
}
}

Expect(updates).To(Equal(tt.wantStatuses))
})
}
Expand Down Expand Up @@ -894,3 +902,65 @@ func TestAddonProvisioner_NewAddonProvisioner(t *testing.T) {
Expect(baseConfigPtr.ClientSecret).To(Equal("base-client-secret"))
Expect(baseConfigPtr.SelfToken).To(Equal("base-token"))
}

func TestAddonProvisioner_Provision_UpgradeBackoff(t *testing.T) {
RegisterTestingT(t)

ocmMock := &ocm.ClientMock{
GetAddonInstallationFunc: func(clusterID string, addonID string) (*clustersmgmtv1.AddOnInstallation, *errors.ServiceError) {
object, err := clustersmgmtv1.NewAddOnInstallation().
ID(addonID).
Addon(clustersmgmtv1.NewAddOn().ID(addonID)).
AddonVersion(clustersmgmtv1.NewAddOnVersion().ID("0.2.0")).
State(clustersmgmtv1.AddOnInstallationStateReady).
Build()
Expect(err).To(Not(HaveOccurred()))
return object, nil
},
GetAddonVersionFunc: func(addonID string, version string) (*clustersmgmtv1.AddOnVersion, error) {
return clustersmgmtv1.NewAddOnVersion().
ID("0.2.0").
SourceImage("quay.io/osd-addons/acs-fleetshard-index@sha256:71eaaccb4d3962043eac953fb3c19a6cc6a88b18c472dd264efc5eb3da4960ac").
PackageImage("quay.io/osd-addons/acs-fleetshard-package@sha256:3e4fc039662b876c83dd4b48a9608d6867a12ab4932c5b7297bfbe50ba8ee61c").
Build()
},
UpdateAddonInstallationFunc: func(clusterID string, addon *clustersmgmtv1.AddOnInstallation) error {
return nil
},
}
addonConfig := ocmImpl.AddonConfig{
FleetshardSyncImageTag: "0307e03",
InheritFleetshardSyncImageTag: true,
}
p := &AddonProvisioner{
ocmClient: ocmMock,
customizations: initCustomizations(addonConfig),
lastStatus: metrics.AddonUpgrade,
lastUpgradeRequestTime: time.Now(),
}
err := p.Provision(api.Cluster{
Addons: addonsJSON([]dbapi.AddonInstallation{
{
ID: "acs-fleetshard",
Version: "0.2.0",
SourceImage: "quay.io/osd-addons/acs-fleetshard-index@sha256:71eaaccb4d3962043eac953fb3c19a6cc6a88b18c472dd264efc5eb3da4960ac",
PackageImage: "quay.io/osd-addons/acs-fleetshard-package@sha256:3e4fc039662b876c83dd4b48a9608d6867a12ab4932c5b7297bfbe50ba8ee61c",
ParametersSHA256Sum: "3e4fc039662b876c83dd4b48a9608d6867a12ab4932c5b7297bfbe50ba8ee61c", // pragma: allowlist secret
},
}),
},
gitops.DataPlaneClusterConfig{
Addons: []gitops.AddonConfig{
{
ID: "acs-fleetshard",
Version: "0.3.0",
Parameters: map[string]string{
"fleetshardSyncImageTag": "inherit",
},
},
},
})

Expect(err).To(Not(HaveOccurred()))
Expect(ocmMock.UpdateAddonInstallationCalls()).To(BeEmpty())
}

0 comments on commit b456aa8

Please sign in to comment.