Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SHIPA-2322] adds wait-retry to helm update & delete #233

Merged
merged 4 commits into from
Feb 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 95 additions & 27 deletions internal/chart/helm_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,55 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

type statusFunc func(cfg *action.Configuration, appName string) (*release.Release, release.Status, error)

const (
waitRetry = iota
takeAction
noAction

notFound release.Status = "not-found"
)

// helmStatusActionMapUpdate maps a Release Status to a Ketch action for helm updates
var helmStatusActionMapUpdate = map[release.Status]int{
notFound: takeAction,
release.StatusUnknown: waitRetry,
release.StatusDeployed: takeAction,
release.StatusUninstalled: takeAction,
release.StatusSuperseded: noAction,
release.StatusFailed: takeAction,
release.StatusUninstalling: waitRetry,
release.StatusPendingInstall: waitRetry,
release.StatusPendingUpgrade: waitRetry,
release.StatusPendingRollback: waitRetry,
}

// helmStatusActionMapDelete maps a Release Status to a Ketch action for helm deletions
var helmStatusActionMapDelete = map[release.Status]int{
notFound: noAction,
release.StatusUnknown: waitRetry,
release.StatusDeployed: takeAction,
release.StatusUninstalled: noAction,
release.StatusSuperseded: noAction,
release.StatusFailed: noAction,
release.StatusUninstalling: noAction,
release.StatusPendingInstall: waitRetry,
release.StatusPendingUpgrade: waitRetry,
release.StatusPendingRollback: waitRetry,
}

const (
defaultDeploymentTimeout = 10 * time.Minute
)

// HelmClient performs helm install and uninstall operations for provided application helm charts.
type HelmClient struct {
cfg *action.Configuration
namespace string
c client.Client
log logr.Logger
cfg *action.Configuration
namespace string
c client.Client
log logr.Logger
statusFunc statusFunc
}

// TemplateValuer is an interface that permits types that implement it (e.g. Application, Job)
Expand Down Expand Up @@ -75,28 +114,6 @@ func (c HelmClient) UpdateChart(tv TemplateValuer, config ChartConfig, opts ...I
updateClient := action.NewUpgrade(c.cfg)
updateClient.Namespace = c.namespace

// we check releases and if lastRelease release was a while back, we set its status to Failure
// next helm update won't be blocked in that case
// this is an edge case we should cover when ketch controller pod gets restarted in middle of deploying and app
// in that case if helm secret (release) remains it would block next deployment forever
lastRelease, err := c.cfg.Releases.Last(appName)
if err != nil && err != driver.ErrReleaseNotFound {
return nil, err
}
if lastRelease != nil {
if lastRelease.Info.Status.IsPending() {
c.log.Info(fmt.Sprintf("Found pending helm release: %d", lastRelease.Version))
timeoutLimit := time.Now().Add(-defaultDeploymentTimeout)
if lastRelease.Info.FirstDeployed.Before(helmTime.Time{Time: timeoutLimit}) {
newStatus := release.StatusDeployed
c.log.Info(fmt.Sprintf("Setting status of release that has timeouted to: %s", newStatus))
lastRelease.SetStatus(newStatus, "manually canceled")
if err := c.cfg.Releases.Update(lastRelease); err != nil {
return nil, err
}
}
}
}
// MaxHistory specifies the maximum number of historical releases that will be retained, including the most recent release.
// Values of 0 or less are ignored (meaning no limits are imposed).
// Let's set it to minimal to disable "helm rollback".
Expand All @@ -105,15 +122,66 @@ func (c HelmClient) UpdateChart(tv TemplateValuer, config ChartConfig, opts ...I
namespace: c.namespace,
cli: c.c,
}
shouldUpdate, err := c.isHelmChartStatusActionable(c.statusFunc, appName, helmStatusActionMapUpdate)
if err != nil || !shouldUpdate {
return nil, err
}
return updateClient.Run(appName, chrt, vals)
}

// DeleteChart uninstalls the app's helm release. It doesn't return an error if the release is not found.
func (c HelmClient) DeleteChart(appName string) error {
shouldDelete, err := c.isHelmChartStatusActionable(c.statusFunc, appName, helmStatusActionMapDelete)
if err != nil || !shouldDelete {
return err
}
uninstall := action.NewUninstall(c.cfg)
_, err := uninstall.Run(appName)
_, err = uninstall.Run(appName)
if err != nil && errors.Is(err, driver.ErrReleaseNotFound) {
return nil
}
return err
}

// getHelmStatus returns the latest Release, Status, and error for an app
func getHelmStatus(cfg *action.Configuration, appName string) (*release.Release, release.Status, error) {
statusClient := action.NewStatus(cfg)
status, err := statusClient.Run(appName)
if err != nil {
if errors.Is(err, driver.ErrReleaseNotFound) || status.Info == nil {
return nil, notFound, nil
}
return nil, "", err
}
return status, status.Info.Status, nil
}

// isHelmChartStatusActionable returns true if the statusFunc returns an actionable status according to the statusActionMap, false if the status is
// non-actionable (e.g. "not-found" status for a "delete" action), and an error if the status requires a wait-retry. The retry is expected to be
// executed by the calling reconciler's inherent looping.
func (c HelmClient) isHelmChartStatusActionable(statusFunc statusFunc, appName string, statusActionMap map[release.Status]int) (bool, error) {
lastRelease, status, err := statusFunc(c.cfg, appName)
if err != nil {
return false, err
}
switch statusActionMap[status] {
case noAction:
c.log.Info(fmt.Sprintf("helm chart for app %s release already in state %s - no action required", appName, status))
return false, nil
case takeAction:
return true, nil
default:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would it help if we implement something like

if lastRelease.Info.FirstDeployed.Before(helmTime.Time{Time: timeoutLimit}) {
newStatus := release.StatusDeployed
c.log.Info(fmt.Sprintf("Setting status of release that has timeouted to: %s", newStatus))
lastRelease.SetStatus(newStatus, "manually canceled")
if err := c.cfg.Releases.Update(lastRelease); err != nil {
return nil, err
}
}

i

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that makes sense. This PR's addition almost duplicates that FirstDeployed.Before check, but considers additional statuses. I removed the original block of code.

c.log.Info(fmt.Sprintf("Found pending helm release: %d", lastRelease.Version))
timeoutLimit := time.Now().Add(-defaultDeploymentTimeout)
if lastRelease.Info.FirstDeployed.Before(helmTime.Time{Time: timeoutLimit}) {
newStatus := release.StatusDeployed
c.log.Info(fmt.Sprintf("Setting status of release that has timeouted to: %s", newStatus))
lastRelease.SetStatus(newStatus, "manually canceled")
if err := c.cfg.Releases.Update(lastRelease); err != nil {
return false, err
}
return true, nil
}
return false, fmt.Errorf("helm chart for app %s in non-actionable status %s", appName, status)
}
}
2 changes: 1 addition & 1 deletion internal/chart/helm_client_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (f *HelmClientFactory) NewHelmClient(namespace string, c client.Client, log
f.configurations[namespace] = cfg
}
f.configurationsLastUsedTimes[namespace] = time.Now()
return &HelmClient{cfg: cfg, namespace: namespace, c: c, log: log.WithValues("helm-client", namespace)}, nil
return &HelmClient{cfg: cfg, namespace: namespace, c: c, log: log.WithValues("helm-client", namespace), statusFunc: getHelmStatus}, nil
}

func (f *HelmClientFactory) cleanup() {
Expand Down
93 changes: 93 additions & 0 deletions internal/chart/helm_client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package chart

import (
"testing"
"time"

"github.com/stretchr/testify/require"
"helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/release"
helmTime "helm.sh/helm/v3/pkg/time"
"sigs.k8s.io/controller-runtime/pkg/log"
)

func TestIsHelmChartStatusActionable(t *testing.T) {
tests := []struct {
description string
status release.Status
statusMap map[release.Status]int // helmStatusActionMapUpdate or helmStatusActionMapDelete
expected bool
expectedError string
}{
{
description: "update - deployed",
status: release.StatusDeployed,
statusMap: helmStatusActionMapUpdate,
expected: true,
},
{
description: "update - not found",
status: notFound,
statusMap: helmStatusActionMapUpdate,
expected: true,
},
{
description: "update - unknown",
status: release.StatusUnknown,
statusMap: helmStatusActionMapUpdate,
expected: false,
expectedError: "helm chart for app testapp in non-actionable status unknown",
},
{
description: "update - superseded",
status: release.StatusSuperseded,
statusMap: helmStatusActionMapUpdate,
expected: false,
},
{
description: "delete - deployed",
status: release.StatusDeployed,
statusMap: helmStatusActionMapDelete,
expected: true,
},
{
description: "delete - not found",
status: notFound,
statusMap: helmStatusActionMapDelete,
expected: false,
},
{
description: "delete - unknown",
status: release.StatusUnknown,
statusMap: helmStatusActionMapDelete,
expected: false,
expectedError: "helm chart for app testapp in non-actionable status unknown",
},
{
description: "delete - superseded",
status: release.StatusSuperseded,
statusMap: helmStatusActionMapDelete,
expected: false,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
c := &HelmClient{
log: log.NullLogger{},
}
mockStatusFunc := func(cfg *action.Configuration, appName string) (*release.Release, release.Status, error) {
status := tc.status
currentRelease := &release.Release{Info: &release.Info{FirstDeployed: helmTime.Time{Time: time.Now()}}}
return currentRelease, status, nil
}

ok, err := c.isHelmChartStatusActionable(mockStatusFunc, "testapp", tc.statusMap)
if tc.expectedError != "" {
require.EqualError(t, err, tc.expectedError)
} else {
require.Nil(t, err)
}
require.Equal(t, tc.expected, ok)
})
}
}