Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase agent's apiserver ready timeout #4454

Merged
merged 1 commit into from
Nov 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pkg/agent/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ func run(ctx context.Context, cfg cmds.Agent, proxy proxy.Proxy) error {
return err
}

util.WaitForAPIServerReady(coreClient, 30*time.Second)
if err := util.WaitForAPIServerReady(ctx, coreClient, util.DefaultAPIServerReadyTimeout); err != nil {
return errors.Wrap(err, "failed to wait for apiserver ready")
}

if err := configureNode(ctx, &nodeConfig.AgentConfig, coreClient.CoreV1().Nodes()); err != nil {
return err
Expand Down
4 changes: 3 additions & 1 deletion pkg/agent/tunnel/tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ func Setup(ctx context.Context, config *config.Node, proxy proxy.Proxy) error {
// and go from the cluster. We go into a faster but noisier connect loop if the watch fails
// following a successful connection.
go func() {
util.WaitForAPIServerReady(client, 30*time.Second)
if err := util.WaitForAPIServerReady(ctx, client, util.DefaultAPIServerReadyTimeout); err != nil {
logrus.Warnf("Tunnel endpoint watch failed to wait for apiserver ready: %v", err)
}
connect:
for {
time.Sleep(5 * time.Second)
Expand Down
2 changes: 1 addition & 1 deletion pkg/daemons/control/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ func waitForAPIServerInBackground(ctx context.Context, runtime *config.ControlRu
select {
case <-ctx.Done():
return
case err := <-promise(func() error { return util.WaitForAPIServerReady(k8sClient, 30*time.Second) }):
case err := <-promise(func() error { return util.WaitForAPIServerReady(ctx, k8sClient, 30*time.Second) }):
if err != nil {
logrus.Infof("Waiting for API server to become available")
continue
Expand Down
15 changes: 11 additions & 4 deletions pkg/util/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ import (
clientset "k8s.io/client-go/kubernetes"
)

// This sets a default duration to wait for the apiserver to become ready. This is primarily used to
// block startup of agent supervisor controllers until the apiserver is ready to serve requests, in the
// same way that the apiReady channel is used in the server packages, so it can be fairly long. It must
// be at least long enough for downstream projects like RKE2 to start the apiserver in the background.
const DefaultAPIServerReadyTimeout = 15 * time.Minute

func GetAddresses(endpoint *v1.Endpoints) []string {
serverAddresses := []string{}
if endpoint == nil {
Expand All @@ -37,14 +43,15 @@ func GetAddresses(endpoint *v1.Endpoints) []string {
}

// WaitForAPIServerReady waits for the API Server's /readyz endpoint to report "ok" with timeout.
// This is cribbed from the Kubernetes controller-manager app, but checks the readyz endpoint instead of the deprecated healthz endpoint.
func WaitForAPIServerReady(client clientset.Interface, timeout time.Duration) error {
// This is modified from WaitForAPIServer from the Kubernetes controller-manager app, but checks the
// readyz endpoint instead of the deprecated healthz endpoint, and supports context.
func WaitForAPIServerReady(ctx context.Context, client clientset.Interface, timeout time.Duration) error {
var lastErr error
restClient := client.Discovery().RESTClient()

err := wait.PollImmediate(time.Second, timeout, func() (bool, error) {
err := wait.PollImmediateWithContext(ctx, time.Second, timeout, func(ctx context.Context) (bool, error) {
healthStatus := 0
result := restClient.Get().AbsPath("/readyz").Do(context.TODO()).StatusCode(&healthStatus)
result := restClient.Get().AbsPath("/readyz").Do(ctx).StatusCode(&healthStatus)
if rerr := result.Error(); rerr != nil {
lastErr = errors.Wrap(rerr, "failed to get apiserver /readyz status")
return false, nil
Expand Down