Skip to content

Commit

Permalink
[SHIPA-2322] adds wait-retry to helm update & delete (#233)
Browse files Browse the repository at this point in the history
* adds wait-retry to helm update & delete

clean up tests

* avoids wait loop; returns error on non-actionable statuses; clean up const labels

* adds default timeout/update to helm chart checks

* rm duplicated code
  • Loading branch information
stinkyfingers authored Feb 11, 2022
1 parent 4b313a6 commit a6830cd
Show file tree
Hide file tree
Showing 3 changed files with 189 additions and 28 deletions.
122 changes: 95 additions & 27 deletions internal/chart/helm_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,55 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

type statusFunc func(cfg *action.Configuration, appName string) (*release.Release, release.Status, error)

const (
waitRetry = iota
takeAction
noAction

notFound release.Status = "not-found"
)

// helmStatusActionMapUpdate maps a Release Status to a Ketch action for helm updates
var helmStatusActionMapUpdate = map[release.Status]int{
notFound: takeAction,
release.StatusUnknown: waitRetry,
release.StatusDeployed: takeAction,
release.StatusUninstalled: takeAction,
release.StatusSuperseded: noAction,
release.StatusFailed: takeAction,
release.StatusUninstalling: waitRetry,
release.StatusPendingInstall: waitRetry,
release.StatusPendingUpgrade: waitRetry,
release.StatusPendingRollback: waitRetry,
}

// helmStatusActionMapDelete maps a Release Status to a Ketch action for helm deletions
var helmStatusActionMapDelete = map[release.Status]int{
notFound: noAction,
release.StatusUnknown: waitRetry,
release.StatusDeployed: takeAction,
release.StatusUninstalled: noAction,
release.StatusSuperseded: noAction,
release.StatusFailed: noAction,
release.StatusUninstalling: noAction,
release.StatusPendingInstall: waitRetry,
release.StatusPendingUpgrade: waitRetry,
release.StatusPendingRollback: waitRetry,
}

const (
defaultDeploymentTimeout = 10 * time.Minute
)

// HelmClient performs helm install and uninstall operations for provided application helm charts.
type HelmClient struct {
cfg *action.Configuration
namespace string
c client.Client
log logr.Logger
cfg *action.Configuration
namespace string
c client.Client
log logr.Logger
statusFunc statusFunc
}

// TemplateValuer is an interface that permits types that implement it (e.g. Application, Job)
Expand Down Expand Up @@ -75,28 +114,6 @@ func (c HelmClient) UpdateChart(tv TemplateValuer, config ChartConfig, opts ...I
updateClient := action.NewUpgrade(c.cfg)
updateClient.Namespace = c.namespace

// we check releases and if lastRelease release was a while back, we set its status to Failure
// next helm update won't be blocked in that case
// this is an edge case we should cover when ketch controller pod gets restarted in middle of deploying and app
// in that case if helm secret (release) remains it would block next deployment forever
lastRelease, err := c.cfg.Releases.Last(appName)
if err != nil && err != driver.ErrReleaseNotFound {
return nil, err
}
if lastRelease != nil {
if lastRelease.Info.Status.IsPending() {
c.log.Info(fmt.Sprintf("Found pending helm release: %d", lastRelease.Version))
timeoutLimit := time.Now().Add(-defaultDeploymentTimeout)
if lastRelease.Info.FirstDeployed.Before(helmTime.Time{Time: timeoutLimit}) {
newStatus := release.StatusDeployed
c.log.Info(fmt.Sprintf("Setting status of release that has timeouted to: %s", newStatus))
lastRelease.SetStatus(newStatus, "manually canceled")
if err := c.cfg.Releases.Update(lastRelease); err != nil {
return nil, err
}
}
}
}
// MaxHistory specifies the maximum number of historical releases that will be retained, including the most recent release.
// Values of 0 or less are ignored (meaning no limits are imposed).
// Let's set it to minimal to disable "helm rollback".
Expand All @@ -105,15 +122,66 @@ func (c HelmClient) UpdateChart(tv TemplateValuer, config ChartConfig, opts ...I
namespace: c.namespace,
cli: c.c,
}
shouldUpdate, err := c.isHelmChartStatusActionable(c.statusFunc, appName, helmStatusActionMapUpdate)
if err != nil || !shouldUpdate {
return nil, err
}
return updateClient.Run(appName, chrt, vals)
}

// DeleteChart uninstalls the app's helm release. It doesn't return an error if the release is not found.
func (c HelmClient) DeleteChart(appName string) error {
shouldDelete, err := c.isHelmChartStatusActionable(c.statusFunc, appName, helmStatusActionMapDelete)
if err != nil || !shouldDelete {
return err
}
uninstall := action.NewUninstall(c.cfg)
_, err := uninstall.Run(appName)
_, err = uninstall.Run(appName)
if err != nil && errors.Is(err, driver.ErrReleaseNotFound) {
return nil
}
return err
}

// getHelmStatus returns the latest Release, Status, and error for an app
func getHelmStatus(cfg *action.Configuration, appName string) (*release.Release, release.Status, error) {
statusClient := action.NewStatus(cfg)
status, err := statusClient.Run(appName)
if err != nil {
if errors.Is(err, driver.ErrReleaseNotFound) || status.Info == nil {
return nil, notFound, nil
}
return nil, "", err
}
return status, status.Info.Status, nil
}

// isHelmChartStatusActionable returns true if the statusFunc returns an actionable status according to the statusActionMap, false if the status is
// non-actionable (e.g. "not-found" status for a "delete" action), and an error if the status requires a wait-retry. The retry is expected to be
// executed by the calling reconciler's inherent looping.
func (c HelmClient) isHelmChartStatusActionable(statusFunc statusFunc, appName string, statusActionMap map[release.Status]int) (bool, error) {
lastRelease, status, err := statusFunc(c.cfg, appName)
if err != nil {
return false, err
}
switch statusActionMap[status] {
case noAction:
c.log.Info(fmt.Sprintf("helm chart for app %s release already in state %s - no action required", appName, status))
return false, nil
case takeAction:
return true, nil
default:
c.log.Info(fmt.Sprintf("Found pending helm release: %d", lastRelease.Version))
timeoutLimit := time.Now().Add(-defaultDeploymentTimeout)
if lastRelease.Info.FirstDeployed.Before(helmTime.Time{Time: timeoutLimit}) {
newStatus := release.StatusDeployed
c.log.Info(fmt.Sprintf("Setting status of release that has timeouted to: %s", newStatus))
lastRelease.SetStatus(newStatus, "manually canceled")
if err := c.cfg.Releases.Update(lastRelease); err != nil {
return false, err
}
return true, nil
}
return false, fmt.Errorf("helm chart for app %s in non-actionable status %s", appName, status)
}
}
2 changes: 1 addition & 1 deletion internal/chart/helm_client_factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (f *HelmClientFactory) NewHelmClient(namespace string, c client.Client, log
f.configurations[namespace] = cfg
}
f.configurationsLastUsedTimes[namespace] = time.Now()
return &HelmClient{cfg: cfg, namespace: namespace, c: c, log: log.WithValues("helm-client", namespace)}, nil
return &HelmClient{cfg: cfg, namespace: namespace, c: c, log: log.WithValues("helm-client", namespace), statusFunc: getHelmStatus}, nil
}

func (f *HelmClientFactory) cleanup() {
Expand Down
93 changes: 93 additions & 0 deletions internal/chart/helm_client_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package chart

import (
"testing"
"time"

"github.com/stretchr/testify/require"
"helm.sh/helm/v3/pkg/action"
"helm.sh/helm/v3/pkg/release"
helmTime "helm.sh/helm/v3/pkg/time"
"sigs.k8s.io/controller-runtime/pkg/log"
)

func TestIsHelmChartStatusActionable(t *testing.T) {
tests := []struct {
description string
status release.Status
statusMap map[release.Status]int // helmStatusActionMapUpdate or helmStatusActionMapDelete
expected bool
expectedError string
}{
{
description: "update - deployed",
status: release.StatusDeployed,
statusMap: helmStatusActionMapUpdate,
expected: true,
},
{
description: "update - not found",
status: notFound,
statusMap: helmStatusActionMapUpdate,
expected: true,
},
{
description: "update - unknown",
status: release.StatusUnknown,
statusMap: helmStatusActionMapUpdate,
expected: false,
expectedError: "helm chart for app testapp in non-actionable status unknown",
},
{
description: "update - superseded",
status: release.StatusSuperseded,
statusMap: helmStatusActionMapUpdate,
expected: false,
},
{
description: "delete - deployed",
status: release.StatusDeployed,
statusMap: helmStatusActionMapDelete,
expected: true,
},
{
description: "delete - not found",
status: notFound,
statusMap: helmStatusActionMapDelete,
expected: false,
},
{
description: "delete - unknown",
status: release.StatusUnknown,
statusMap: helmStatusActionMapDelete,
expected: false,
expectedError: "helm chart for app testapp in non-actionable status unknown",
},
{
description: "delete - superseded",
status: release.StatusSuperseded,
statusMap: helmStatusActionMapDelete,
expected: false,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
c := &HelmClient{
log: log.NullLogger{},
}
mockStatusFunc := func(cfg *action.Configuration, appName string) (*release.Release, release.Status, error) {
status := tc.status
currentRelease := &release.Release{Info: &release.Info{FirstDeployed: helmTime.Time{Time: time.Now()}}}
return currentRelease, status, nil
}

ok, err := c.isHelmChartStatusActionable(mockStatusFunc, "testapp", tc.statusMap)
if tc.expectedError != "" {
require.EqualError(t, err, tc.expectedError)
} else {
require.Nil(t, err)
}
require.Equal(t, tc.expected, ok)
})
}
}

0 comments on commit a6830cd

Please sign in to comment.