theketchio · stinkyfingers · Feb 11, 2022 · Feb 8, 2022 · Feb 9, 2022 · Feb 10, 2022
diff --git a/internal/chart/helm_client.go b/internal/chart/helm_client.go
@@ -15,16 +15,58 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
+type statusFunc func(cfg *action.Configuration, appName string) (*release.Release, release.Status, error)
+
+const (
+	WaitRetry = iota
+	TakeAction
+	NoAction
+)
+
+// helmStatusActionMapUpdate maps a Release Status to a Ketch action for helm updates
+var helmStatusActionMapUpdate = map[release.Status]int{
+	"not-found":                   TakeAction,
+	release.StatusUnknown:         WaitRetry,
+	release.StatusDeployed:        TakeAction,
+	release.StatusUninstalled:     TakeAction,
+	release.StatusSuperseded:      NoAction,
+	release.StatusFailed:          TakeAction,
+	release.StatusUninstalling:    WaitRetry,
+	release.StatusPendingInstall:  WaitRetry,
+	release.StatusPendingUpgrade:  WaitRetry,
+	release.StatusPendingRollback: WaitRetry,
+}
+
+// helmStatusActionMapDelete maps a Release Status to a Ketch action for helm deletions
+var helmStatusActionMapDelete = map[release.Status]int{
+	"not-found":                   NoAction,
+	release.StatusUnknown:         WaitRetry,
+	release.StatusDeployed:        TakeAction,
+	release.StatusUninstalled:     NoAction,
+	release.StatusSuperseded:      NoAction,
+	release.StatusFailed:          NoAction,
+	release.StatusUninstalling:    NoAction,
+	release.StatusPendingInstall:  WaitRetry,
+	release.StatusPendingUpgrade:  WaitRetry,
+	release.StatusPendingRollback: WaitRetry,
+}
+
+var (
+	statusRetryInterval = time.Second * 1
+	statusRetryTimeout  = time.Second * 5
+)
+
 const (
 	defaultDeploymentTimeout = 10 * time.Minute
 )
 
 // HelmClient performs helm install and uninstall operations for provided application helm charts.
 type HelmClient struct {
-	cfg       *action.Configuration
-	namespace string
-	c         client.Client
-	log       logr.Logger
+	cfg        *action.Configuration
+	namespace  string
+	c          client.Client
+	log        logr.Logger
+	statusFunc statusFunc
 }
 
 // TemplateValuer is an interface that permits types that implement it (e.g. Application, Job)
@@ -105,15 +147,67 @@ func (c HelmClient) UpdateChart(tv TemplateValuer, config ChartConfig, opts ...I
 		namespace: c.namespace,
 		cli:       c.c,
 	}
+	shouldUpdate, err := c.waitForActionableStatus(c.statusFunc, appName, helmStatusActionMapUpdate)
+	if err != nil || !shouldUpdate {
+		return nil, err
+	}
 	return updateClient.Run(appName, chrt, vals)
 }
 
 // DeleteChart uninstalls the app's helm release. It doesn't return an error if the release is not found.
 func (c HelmClient) DeleteChart(appName string) error {
+	shouldDelete, err := c.waitForActionableStatus(c.statusFunc, appName, helmStatusActionMapDelete)
+	if err != nil || !shouldDelete {
+		return err
+	}
 	uninstall := action.NewUninstall(c.cfg)
-	_, err := uninstall.Run(appName)
+	_, err = uninstall.Run(appName)
 	if err != nil && errors.Is(err, driver.ErrReleaseNotFound) {
 		return nil
 	}
 	return err
 }
+
+// getHelmStatus returns the Release, Status, and error for an app
+func getHelmStatus(cfg *action.Configuration, appName string) (*release.Release, release.Status, error) {
+	statusClient := action.NewStatus(cfg)
+	status, err := statusClient.Run(appName)
+	if err != nil {
+		if errors.Is(err, driver.ErrReleaseNotFound) || status.Info == nil {
+			return nil, "not-found", nil
+		}
+		return nil, "", err
+	}
+	return status, status.Info.Status, nil
+}
+
+// waitForActionableStatus returns true if the helm status is such that it is ok to proceed with update/delete. If the statusFunc returns
+// WaitRetry, the func blocks while retrying.
+func (c HelmClient) waitForActionableStatus(statusFunc statusFunc, appName string, statusActionMap map[release.Status]int) (bool, error) {
+	ticker := time.NewTicker(statusRetryInterval)
+	done := time.After(statusRetryTimeout)
+	var helmRelease *release.Release
+	var status release.Status
+	var err error
+	for {
+		select {
+		case <-done:
+			c.log.Info(fmt.Sprintf("Setting status (%s) of %s release that has timeouted to: deployed", appName, status))
+			helmRelease.SetStatus(release.StatusDeployed, "set manually after timeout waiting for actionable status")
+			return true, nil
+		case <-ticker.C:
+			helmRelease, status, err = statusFunc(c.cfg, appName)
+			if err != nil {
+				return false, err
+			}
+			action := statusActionMap[status] // default wait-retry
+			if action == NoAction {
+				c.log.Info(fmt.Sprintf("helm chart for app %s release already in state %s - no action required", appName, status))
+				return false, nil
+			}
+			if action == TakeAction {
+				return true, nil
+			}
+		}
+	}
+}
diff --git a/internal/chart/helm_client_factory.go b/internal/chart/helm_client_factory.go
@@ -51,7 +51,7 @@ func (f *HelmClientFactory) NewHelmClient(namespace string, c client.Client, log
 		f.configurations[namespace] = cfg
 	}
 	f.configurationsLastUsedTimes[namespace] = time.Now()
-	return &HelmClient{cfg: cfg, namespace: namespace, c: c, log: log.WithValues("helm-client", namespace)}, nil
+	return &HelmClient{cfg: cfg, namespace: namespace, c: c, log: log.WithValues("helm-client", namespace), statusFunc: getHelmStatus}, nil
 }
 
 func (f *HelmClientFactory) cleanup() {

diff --git a/internal/chart/helm_client_test.go b/internal/chart/helm_client_test.go
@@ -0,0 +1,117 @@
+package chart
+
+import (
+	"testing"
+	"time"
+
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	"github.com/stretchr/testify/require"
+	"helm.sh/helm/v3/pkg/action"
+	"helm.sh/helm/v3/pkg/chart"
+	"helm.sh/helm/v3/pkg/release"
+)
+
+func TestWaitForActionableStatus(t *testing.T) {
+	tests := []struct {
+		description             string
+		statusFuncMap           map[int]release.Status // sequence of responses returned by mockStatusFunc
+		statusMap               map[release.Status]int // test update or delete
+		expected                bool
+		expectedStatusFuncCalls int
+	}{
+		{
+			description:             "delete - deployed",
+			statusFuncMap:           map[int]release.Status{0: release.StatusDeployed},
+			statusMap:               helmStatusActionMapDelete,
+			expected:                true,
+			expectedStatusFuncCalls: 1,
+		},
+		{
+			description:             "delete - eventual success",
+			statusFuncMap:           map[int]release.Status{0: release.StatusUnknown, 1: release.StatusDeployed},
+			statusMap:               helmStatusActionMapDelete,
+			expected:                true,
+			expectedStatusFuncCalls: 2,
+		},
+		{
+			description:             "delete - not found",
+			statusFuncMap:           map[int]release.Status{0: "not-found"},
+			statusMap:               helmStatusActionMapDelete,
+			expected:                false,
+			expectedStatusFuncCalls: 1,
+		},
+		{
+			description:             "delete - superseded",
+			statusFuncMap:           map[int]release.Status{0: release.StatusSuperseded},
+			statusMap:               helmStatusActionMapDelete,
+			expected:                false,
+			expectedStatusFuncCalls: 1,
+		},
+		{
+			description:             "delete timeout",
+			statusFuncMap:           map[int]release.Status{0: release.StatusPendingInstall, 1: release.StatusPendingInstall, 2: release.StatusPendingInstall, 3: release.StatusPendingInstall, 4: release.StatusPendingInstall},
+			statusMap:               helmStatusActionMapDelete,
+			expected:                true,
+			expectedStatusFuncCalls: 5,
+		},
+		{
+			description:             "update - deployed",
+			statusFuncMap:           map[int]release.Status{0: release.StatusDeployed},
+			statusMap:               helmStatusActionMapUpdate,
+			expected:                true,
+			expectedStatusFuncCalls: 1,
+		},
+		{
+			description:             "update - eventual success",
+			statusFuncMap:           map[int]release.Status{0: release.StatusUnknown, 1: release.StatusDeployed},
+			statusMap:               helmStatusActionMapUpdate,
+			expected:                true,
+			expectedStatusFuncCalls: 2,
+		},
+		{
+			description:             "update - not found",
+			statusFuncMap:           map[int]release.Status{0: "not-found"},
+			statusMap:               helmStatusActionMapUpdate,
+			expected:                true,
+			expectedStatusFuncCalls: 1,
+		},
+		{
+			description:             "update - superseded",
+			statusFuncMap:           map[int]release.Status{0: release.StatusSuperseded},
+			statusMap:               helmStatusActionMapUpdate,
+			expected:                false,
+			expectedStatusFuncCalls: 1,
+		},
+		{
+			description:             "update timeout",
+			statusFuncMap:           map[int]release.Status{0: release.StatusPendingInstall, 1: release.StatusPendingInstall, 2: release.StatusPendingInstall, 3: release.StatusPendingInstall, 4: release.StatusPendingInstall},
+			statusMap:               helmStatusActionMapUpdate,
+			expected:                true,
+			expectedStatusFuncCalls: 5,
+		},
+	}
+	for _, tc := range tests {
+		t.Run(tc.description, func(t *testing.T) {
+			c := &HelmClient{
+				log: log.NullLogger{},
+			}
+			// speed up retry, timeout
+			statusRetryInterval = time.Millisecond * 100
+			statusRetryTimeout = time.Millisecond * 500
+			// mockStatusFunc and counter to track times called
+			counter := 0
+			mockStatusFunc := func(cfg *action.Configuration, appName string) (*release.Release, release.Status, error) {
+				status := tc.statusFuncMap[counter]
+				counter += 1
+				mockRelease := &release.Release{Chart: &chart.Chart{}, Info: &release.Info{}}
+				return mockRelease, status, nil
+			}
+
+			ok, err := c.waitForActionableStatus(mockStatusFunc, "testapp", tc.statusMap)
+			require.Nil(t, err)
+			require.Equal(t, tc.expected, ok)
+			require.Equal(t, tc.expectedStatusFuncCalls, counter)
+		})
+	}
+}