From 22009580c0cb2fa9ba7e2fb51ab3aff847241860 Mon Sep 17 00:00:00 2001 From: Enxebre Date: Fri, 12 Apr 2019 15:08:25 +0200 Subject: [PATCH] Inrease wait time for nodes going ready Occasionally some nodes remain unready for ever presumably due to https://bugzilla.redhat.com/show_bug.cgi?id=1698253 which causes https://bugzilla.redhat.com/show_bug.cgi?id=1698624 Orthogonally some tests are timing out while the node eventually goes ready, hence this PR increases the polling time See, all failures: https://openshift-gce-devel.appspot.com/builds/origin-ci-test/pr-logs/pull/openshift_machine-api-operator/261/pull-ci-openshift-machine-api-operator-master-e2e-aws-operator/ e.g: https://openshift-gce-devel.appspot.com/build/origin-ci-test/pr-logs/pull/openshift_machine-api-operator/261/pull-ci-openshift-machine-api-operator-master-e2e-aws-operator/781/ ip-10-0-133-147.ec2.internal makes recover from deleted worker machines to fail: E0412 08:06:16.949021 4971 framework.go:448] Node "ip-10-0-133-147.ec2.internal" is not ready E0412 08:06:16.968104 4971 framework.go:448] Node "ip-10-0-133-147.ec2.internal" is not ready while in the next test it eventually goes ready: I0412 08:06:28.961206 4971 utils.go:233] Node "ip-10-0-133-147.ec2.internal". Ready: true. Unschedulable: false We are timing out only recently since the time for a node to go ready increased slightly and still to a reasonable amount of time. Is difficult to say though the reason for this yet, might be related to crio changes, to skew between bootimage and machine-os-content image and pivoting, CI cloud rate limits, or similar factors. --- pkg/e2e/framework/framework.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/e2e/framework/framework.go b/pkg/e2e/framework/framework.go index 26ecf23ed..1c8667ed3 100644 --- a/pkg/e2e/framework/framework.go +++ b/pkg/e2e/framework/framework.go @@ -436,7 +436,7 @@ func IsNodeReady(node *corev1.Node) bool { } func WaitUntilAllNodesAreReady(client runtimeclient.Client) error { - return wait.PollImmediate(1*time.Second, time.Minute, func() (bool, error) { + return wait.PollImmediate(1*time.Second, PoolNodesReadyTimeout, func() (bool, error) { nodeList := corev1.NodeList{} if err := client.List(context.TODO(), &runtimeclient.ListOptions{}, &nodeList); err != nil { glog.Errorf("error querying api for nodeList object: %v, retrying...", err)