Skip to content

Commit

Permalink
Remove stale Ready=False conditions values
Browse files Browse the repository at this point in the history
When the reconciliation begins, while fulfilling the prerequisites,
Ready=False condition for various reasons are added on the object. On
failure, this reason is persisted on the object. On a subsequent
reconciliation, when the failure is recovered, the Ready=False condition
is not updates until the atomic reconciliation reaches a conclusion.
During this period if the atomic reconciliation enters a retry loop due
to constant drift detection and correction, the stale Ready=False
condition with incorrect reason persists on the object. The Ready=False
message is also copied to Reconciling=True condition, resulting in an
incorrect depiction of what's actually happening.
For example, if previously the HelmRelease failed with dependency not
ready error, on a subsequent reconciliation, even after going past the
dependency check and returning from atomic reconciliation due to drift
detection and correction loop scenario, the Ready=False condition
continues to show the stale dependency not ready error.

In order to show more accurate status, the Ready=False conditions added
while fulfilling prerequisites can be removed once those checks have
succeeded, updating Ready=False to Ready=Unknown with "reconciliation in
progress" message. If the atomic reconciliation gets stuck in the drift
detection and correction loop with this, the Ready and Reconciling
conditons would show "reconciliation in progress". This should be a
better indicator of what's going on. The events and logs can be checked
to determine accurately what's causing the reconciliation to be
progressing for ever.

Signed-off-by: Sunny <[email protected]>
  • Loading branch information
darkowlzz committed Feb 5, 2024
1 parent d370e73 commit 59c577a
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 3 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ require (
github.com/fluxcd/pkg/apis/event v0.7.0
github.com/fluxcd/pkg/apis/kustomize v1.3.0
github.com/fluxcd/pkg/apis/meta v1.3.0
github.com/fluxcd/pkg/runtime v0.44.0
github.com/fluxcd/pkg/runtime v0.44.1
github.com/fluxcd/pkg/ssa v0.36.0
github.com/fluxcd/pkg/testserver v0.5.0
github.com/fluxcd/source-controller/api v1.2.4
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ github.com/fluxcd/pkg/apis/kustomize v1.3.0 h1:qvB46CfaOWcL1SyR2RiVWN/j7/035D0Ot
github.com/fluxcd/pkg/apis/kustomize v1.3.0/go.mod h1:PCXf5kktTzNav0aH2Ns3jsowqwmA9xTcsrEOoPzx/K8=
github.com/fluxcd/pkg/apis/meta v1.3.0 h1:KxeEc6olmSZvQ5pBONPE4IKxyoWQbqTJF1X6K5nIXpU=
github.com/fluxcd/pkg/apis/meta v1.3.0/go.mod h1:3Ui8xFkoU4sYehqmscjpq7NjqH2YN1A2iX2okbO3/yA=
github.com/fluxcd/pkg/runtime v0.44.0 h1:0BEPSpcsYXOiswKG5TWkin8fhCDHb0nDdAtq/5VrCSI=
github.com/fluxcd/pkg/runtime v0.44.0/go.mod h1:s1AhSOTCEBPaTfz/GdBD/Ws66uOByIuNP4Znrq+is9M=
github.com/fluxcd/pkg/runtime v0.44.1 h1:XuPTcNIgn/NsoIo/A6qfPZaD9E7cbnJTDbeNw8O1SZQ=
github.com/fluxcd/pkg/runtime v0.44.1/go.mod h1:s1AhSOTCEBPaTfz/GdBD/Ws66uOByIuNP4Znrq+is9M=
github.com/fluxcd/pkg/ssa v0.36.0 h1:h9FB6SrrdVlxNQtfG+Fb/Roe1e61EPgtmJ5ORlAxwkU=
github.com/fluxcd/pkg/ssa v0.36.0/go.mod h1:FJj4xznwBvRM+9h02lGGC0CGYGucPeXO7P6NEPphbys=
github.com/fluxcd/pkg/testserver v0.5.0 h1:n/Iskk0tXNt2AgIgjz9qeFK/VhEXGfqeazABXZmO2Es=
Expand Down
28 changes: 28 additions & 0 deletions internal/controller/helmrelease_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe

log.Info("all dependencies are ready")
}
// Remove any stale corresponding Ready=False condition with Unknown.
if conditions.HasAnyReason(obj, meta.ReadyCondition, v2.DependencyNotReadyReason) {
conditions.MarkUnknown(obj, meta.ReadyCondition, meta.ProgressingReason, "reconciliation in progress")
}

// Get the HelmChart object for the release.
hc, err := r.getHelmChart(ctx, obj)
Expand All @@ -266,6 +270,10 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe
conditions.MarkFalse(obj, meta.ReadyCondition, v2.ArtifactFailedReason, msg)
return ctrl.Result{}, err
}
// Remove any stale corresponding Ready=False condition with Unknown.
if conditions.HasAnyReason(obj, meta.ReadyCondition, aclv1.AccessDeniedReason, v2.ArtifactFailedReason) {
conditions.MarkUnknown(obj, meta.ReadyCondition, meta.ProgressingReason, "reconciliation in progress")
}

// Check if the HelmChart is ready.
if ready, reason := isHelmChartReady(hc); !ready {
Expand All @@ -276,13 +284,21 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe
// the watcher should trigger a reconciliation.
return jitter.JitteredRequeueInterval(ctrl.Result{RequeueAfter: obj.GetRequeueAfter()}), errWaitForChart
}
// Remove any stale corresponding Ready=False condition with Unknown.
if conditions.HasAnyReason(obj, meta.ReadyCondition, "HelmChartNotReady") {
conditions.MarkUnknown(obj, meta.ReadyCondition, meta.ProgressingReason, "reconciliation in progress")
}

// Compose values based from the spec and references.
values, err := chartutil.ChartValuesFromReferences(ctx, r.Client, obj.Namespace, obj.GetValues(), obj.Spec.ValuesFrom...)
if err != nil {
conditions.MarkFalse(obj, meta.ReadyCondition, "ValuesError", err.Error())
return ctrl.Result{}, err
}
// Remove any stale corresponding Ready=False condition with Unknown.
if conditions.HasAnyReason(obj, meta.ReadyCondition, "ValuesError") {
conditions.MarkUnknown(obj, meta.ReadyCondition, meta.ProgressingReason, "reconciliation in progress")
}

// Load chart from artifact.
loadedChart, err := loader.SecureLoadChartFromURL(loader.NewRetryableHTTPClient(ctx, r.artifactFetchRetries), hc.GetArtifact().URL, hc.GetArtifact().Digest)
Expand All @@ -297,13 +313,21 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe
conditions.MarkFalse(obj, meta.ReadyCondition, v2.ArtifactFailedReason, fmt.Sprintf("Could not load chart: %s", err.Error()))
return ctrl.Result{}, err
}
// Remove any stale corresponding Ready=False condition with Unknown.
if conditions.HasAnyReason(obj, meta.ReadyCondition, v2.ArtifactFailedReason) {
conditions.MarkUnknown(obj, meta.ReadyCondition, meta.ProgressingReason, "reconciliation in progress")
}

// Build the REST client getter.
getter, err := r.buildRESTClientGetter(ctx, obj)
if err != nil {
conditions.MarkFalse(obj, meta.ReadyCondition, "RESTClientError", err.Error())
return ctrl.Result{}, err
}
// Remove any stale corresponding Ready=False condition with Unknown.
if conditions.HasAnyReason(obj, meta.ReadyCondition, "RESTClientError") {
conditions.MarkUnknown(obj, meta.ReadyCondition, meta.ProgressingReason, "reconciliation in progress")
}

// Attempt to adopt "legacy" v2beta1 release state on a best-effort basis.
// If this fails, the controller will fall back to performing an upgrade
Expand Down Expand Up @@ -354,6 +378,10 @@ func (r *HelmReleaseReconciler) reconcileRelease(ctx context.Context, patchHelpe
conditions.MarkFalse(obj, meta.ReadyCondition, "FactoryError", err.Error())
return ctrl.Result{}, err
}
// Remove any stale corresponding Ready=False condition with Unknown.
if conditions.HasAnyReason(obj, meta.ReadyCondition, "FactoryError") {
conditions.MarkUnknown(obj, meta.ReadyCondition, meta.ProgressingReason, "reconciliation in progress")
}

// Off we go!
if err = intreconcile.NewAtomicRelease(patchHelper, cfg, r.EventRecorder, r.FieldManager).Reconcile(ctx, &intreconcile.Request{
Expand Down
122 changes: 122 additions & 0 deletions internal/controller/helmrelease_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"sigs.k8s.io/yaml"

"github.com/fluxcd/pkg/apis/acl"
aclv1 "github.com/fluxcd/pkg/apis/acl"
"github.com/fluxcd/pkg/apis/meta"
"github.com/fluxcd/pkg/runtime/conditions"
feathelper "github.com/fluxcd/pkg/runtime/features"
Expand Down Expand Up @@ -762,6 +763,127 @@ func TestHelmReleaseReconciler_reconcileRelease(t *testing.T) {
g.Expect(obj.Status.LastAttemptedConfigDigest).To(Equal("sha256:1dabc4e3cbbd6a0818bd460f3a6c9855bfe95d506c74726bc0f2edb0aecb1f4e"))
g.Expect(obj.Status.LastAttemptedValuesChecksum).To(BeEmpty())
})

t.Run("error recovery updates ready condition", func(t *testing.T) {
g := NewWithT(t)

// Create a test namespace for storing the Helm release mock.
ns, err := testEnv.CreateNamespace(context.TODO(), "error-recovery")
g.Expect(err).ToNot(HaveOccurred())
t.Cleanup(func() {
_ = testEnv.Delete(context.TODO(), ns)
})

// Create HelmChart mock.
chartMock := testutil.BuildChart()
chartArtifact, err := testutil.SaveChartAsArtifact(chartMock, digest.SHA256, testServer.URL(), testServer.Root())
g.Expect(err).ToNot(HaveOccurred())

chart := &sourcev1b2.HelmChart{
ObjectMeta: metav1.ObjectMeta{
Name: "error-recovery",
Namespace: ns.Name,
Generation: 1,
},
Spec: sourcev1b2.HelmChartSpec{
Chart: "testdata/test-helmrepo",
Version: "0.1.0",
SourceRef: sourcev1b2.LocalHelmChartSourceReference{
Kind: sourcev1b2.HelmRepositoryKind,
Name: "error-recovery",
},
},
Status: sourcev1b2.HelmChartStatus{
ObservedGeneration: 1,
Artifact: chartArtifact,
Conditions: []metav1.Condition{
{
Type: meta.ReadyCondition,
Status: metav1.ConditionTrue,
},
},
},
}

// Create a test Helm release storage mock.
rls := testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: "error-recovery",
Namespace: ns.Name,
Version: 1,
Chart: chartMock,
Status: helmrelease.StatusDeployed,
}, testutil.ReleaseWithConfig(nil))

obj := &v2.HelmRelease{
ObjectMeta: metav1.ObjectMeta{
Name: "error-recovery",
Namespace: ns.Name,
},
Spec: v2.HelmReleaseSpec{
StorageNamespace: ns.Name,
},
Status: v2.HelmReleaseStatus{
HelmChart: chart.Namespace + "/" + chart.Name,
History: v2.Snapshots{
release.ObservedToSnapshot(release.ObserveRelease(rls)),
},
},
}

c := fake.NewClientBuilder().
WithScheme(NewTestScheme()).
WithStatusSubresource(&v2.HelmRelease{}).
WithObjects(chart, obj).
Build()

r := &HelmReleaseReconciler{
Client: c,
GetClusterConfig: GetTestClusterConfig,
EventRecorder: record.NewFakeRecorder(32),
FieldManager: "test",
}

// Store the Helm release mock in the test namespace.
getter, err := r.buildRESTClientGetter(context.TODO(), obj)
g.Expect(err).ToNot(HaveOccurred())

cfg, err := action.NewConfigFactory(getter, action.WithStorage(helmdriver.SecretsDriverName, obj.GetStorageNamespace()))
g.Expect(err).ToNot(HaveOccurred())

store := helmstorage.Init(cfg.Driver)
g.Expect(store.Create(rls)).To(Succeed())

sp := patch.NewSerialPatcher(obj, r.Client)

// List of failure reasons to test.
prereqFailures := []string{
v2.DependencyNotReadyReason,
aclv1.AccessDeniedReason,
v2.ArtifactFailedReason,
"HelmChartNotReady",
"ValuesError",
"RESTClientError",
"FactoryError",
}

// Update ready condition for each failure, reconcile and check if the
// stale failure condition gets updated.
for _, failReason := range prereqFailures {
conditions.MarkFalse(obj, meta.ReadyCondition, failReason, "foo")
err := sp.Patch(context.TODO(), obj,
patch.WithOwnedConditions{Conditions: intreconcile.OwnedConditions},
patch.WithFieldOwner(r.FieldManager),
)
g.Expect(err).ToNot(HaveOccurred())

_, err = r.reconcileRelease(context.TODO(), sp, obj)
g.Expect(err).ToNot(HaveOccurred())

ready := conditions.Get(obj, meta.ReadyCondition)
g.Expect(ready.Status).To(Equal(metav1.ConditionUnknown))
g.Expect(ready.Reason).To(Equal(meta.ProgressingReason))
}
})
}

func TestHelmReleaseReconciler_reconcileDelete(t *testing.T) {
Expand Down

0 comments on commit 59c577a

Please sign in to comment.