From b1e45cb6ae8201d715e6e47e4b88bef86b0cf61c Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Thu, 6 Oct 2022 11:40:09 +0200 Subject: [PATCH] Don't retry health check when Unauthorized is returned This fixes unnecessary delays in self-repair when the underlying provider (e.g. the AWS provider) performs periodic kubeconfig refreshes. Without this, the health check is retried 10 times with a 10s interval, meaning that the controller is unable to act for at least 100 seconds, even though it could repair itself immediately. --- controllers/remote/cluster_cache_tracker.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/controllers/remote/cluster_cache_tracker.go b/controllers/remote/cluster_cache_tracker.go index 0b0dcab293eb..86318a477986 100644 --- a/controllers/remote/cluster_cache_tracker.go +++ b/controllers/remote/cluster_cache_tracker.go @@ -484,6 +484,12 @@ func (t *ClusterCacheTracker) healthCheckCluster(ctx context.Context, in *health // If no error occurs, reset the unhealthy counter. _, err := restClient.Get().AbsPath(in.path).Timeout(in.requestTimeout).DoRaw(ctx) if err != nil { + if apierrors.IsUnauthorized(err) { + // Unauthorized means that the underlying kubeconfig is not authorizing properly anymore, which + // usually is the result of automatic kubeconfig refreshes, meaning that we have to throw away the + // clusterAccessor and rely on the creation of a new one (with a refreshed kubeconfig) + return false, err + } unhealthyCount++ } else { unhealthyCount = 0