diff --git a/pkg/agent/run.go b/pkg/agent/run.go index bbfae253c81e..05179dbd373a 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -137,7 +137,9 @@ func run(ctx context.Context, cfg cmds.Agent, proxy proxy.Proxy) error { return err } - util.WaitForAPIServerReady(coreClient, 30*time.Second) + if err := util.WaitForAPIServerReady(ctx, coreClient, util.DefaultAPIServerReadyTimeout); err != nil { + return errors.Wrap(err, "failed to wait for apiserver ready") + } if err := configureNode(ctx, &nodeConfig.AgentConfig, coreClient.CoreV1().Nodes()); err != nil { return err diff --git a/pkg/agent/tunnel/tunnel.go b/pkg/agent/tunnel/tunnel.go index 0503152eaa58..98da2faa398d 100644 --- a/pkg/agent/tunnel/tunnel.go +++ b/pkg/agent/tunnel/tunnel.go @@ -78,7 +78,9 @@ func Setup(ctx context.Context, config *config.Node, proxy proxy.Proxy) error { // and go from the cluster. We go into a faster but noisier connect loop if the watch fails // following a successful connection. go func() { - util.WaitForAPIServerReady(client, 30*time.Second) + if err := util.WaitForAPIServerReady(ctx, client, util.DefaultAPIServerReadyTimeout); err != nil { + logrus.Warnf("Tunnel endpoint watch failed to wait for apiserver ready: %v", err) + } connect: for { time.Sleep(5 * time.Second) diff --git a/pkg/daemons/control/server.go b/pkg/daemons/control/server.go index 0e4489000703..6a5dd4c32d32 100644 --- a/pkg/daemons/control/server.go +++ b/pkg/daemons/control/server.go @@ -454,7 +454,7 @@ func waitForAPIServerInBackground(ctx context.Context, runtime *config.ControlRu select { case <-ctx.Done(): return - case err := <-promise(func() error { return util.WaitForAPIServerReady(k8sClient, 30*time.Second) }): + case err := <-promise(func() error { return util.WaitForAPIServerReady(ctx, k8sClient, 30*time.Second) }): if err != nil { logrus.Infof("Waiting for API server to become available") continue diff --git a/pkg/util/api.go b/pkg/util/api.go index a3044045ed96..7f55c4f2e3c4 100644 --- a/pkg/util/api.go +++ b/pkg/util/api.go @@ -16,6 +16,12 @@ import ( clientset "k8s.io/client-go/kubernetes" ) +// This sets a default duration to wait for the apiserver to become ready. This is primarily used to +// block startup of agent supervisor controllers until the apiserver is ready to serve requests, in the +// same way that the apiReady channel is used in the server packages, so it can be fairly long. It must +// be at least long enough for downstream projects like RKE2 to start the apiserver in the background. +const DefaultAPIServerReadyTimeout = 15 * time.Minute + func GetAddresses(endpoint *v1.Endpoints) []string { serverAddresses := []string{} if endpoint == nil { @@ -37,15 +43,19 @@ func GetAddresses(endpoint *v1.Endpoints) []string { } // WaitForAPIServerReady waits for the API Server's /readyz endpoint to report "ok" with timeout. -// This is cribbed from the Kubernetes controller-manager app, but checks the readyz endpoint instead of the deprecated healthz endpoint. -func WaitForAPIServerReady(client clientset.Interface, timeout time.Duration) error { +// This is modified from WaitForAPIServer from the Kubernetes controller-manager app, but checks the +// readyz endpoint instead of the deprecated healthz endpoint, and supports context. +func WaitForAPIServerReady(ctx context.Context, client clientset.Interface, timeout time.Duration) error { var lastErr error restClient := client.Discovery().RESTClient() err := wait.PollImmediate(time.Second, timeout, func() (bool, error) { healthStatus := 0 - result := restClient.Get().AbsPath("/readyz").Do(context.TODO()).StatusCode(&healthStatus) + result := restClient.Get().AbsPath("/readyz").Do(ctx).StatusCode(&healthStatus) if rerr := result.Error(); rerr != nil { + if errors.Is(rerr, context.Canceled) { + return false, rerr + } lastErr = errors.Wrap(rerr, "failed to get apiserver /readyz status") return false, nil }