From b1e45cb6ae8201d715e6e47e4b88bef86b0cf61c Mon Sep 17 00:00:00 2001
From: Alexander Block <ablock84@gmail.com>
Date: Thu, 6 Oct 2022 11:40:09 +0200
Subject: [PATCH] Don't retry health check when Unauthorized is returned

This fixes unnecessary delays in self-repair when the underlying provider
(e.g. the AWS provider) performs periodic kubeconfig refreshes. Without
this, the health check is retried 10 times with a 10s interval, meaning
that the controller is unable to act for at least 100 seconds, even though
it could repair itself immediately.
---
 controllers/remote/cluster_cache_tracker.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/controllers/remote/cluster_cache_tracker.go b/controllers/remote/cluster_cache_tracker.go
index 0b0dcab293eb..86318a477986 100644
--- a/controllers/remote/cluster_cache_tracker.go
+++ b/controllers/remote/cluster_cache_tracker.go
@@ -484,6 +484,12 @@ func (t *ClusterCacheTracker) healthCheckCluster(ctx context.Context, in *health
 		// If no error occurs, reset the unhealthy counter.
 		_, err := restClient.Get().AbsPath(in.path).Timeout(in.requestTimeout).DoRaw(ctx)
 		if err != nil {
+			if apierrors.IsUnauthorized(err) {
+				// Unauthorized means that the underlying kubeconfig is not authorizing properly anymore, which
+				// usually is the result of automatic kubeconfig refreshes, meaning that we have to throw away the
+				// clusterAccessor and rely on the creation of a new one (with a refreshed kubeconfig)
+				return false, err
+			}
 			unhealthyCount++
 		} else {
 			unhealthyCount = 0