From 22009580c0cb2fa9ba7e2fb51ab3aff847241860 Mon Sep 17 00:00:00 2001
From: Enxebre <alberto.garcial@hotmail.com>
Date: Fri, 12 Apr 2019 15:08:25 +0200
Subject: [PATCH] Inrease wait time for nodes going ready

Occasionally some nodes remain unready for ever presumably due to
https://bugzilla.redhat.com/show_bug.cgi?id=1698253 which causes https://bugzilla.redhat.com/show_bug.cgi?id=1698624

Orthogonally some tests are timing out while the node eventually goes ready, hence this PR increases the polling time
See, all failures:
https://openshift-gce-devel.appspot.com/builds/origin-ci-test/pr-logs/pull/openshift_machine-api-operator/261/pull-ci-openshift-machine-api-operator-master-e2e-aws-operator/
e.g:
https://openshift-gce-devel.appspot.com/build/origin-ci-test/pr-logs/pull/openshift_machine-api-operator/261/pull-ci-openshift-machine-api-operator-master-e2e-aws-operator/781/

ip-10-0-133-147.ec2.internal makes recover from deleted worker machines to fail:

E0412 08:06:16.949021    4971 framework.go:448] Node "ip-10-0-133-147.ec2.internal" is not ready
E0412 08:06:16.968104    4971 framework.go:448] Node "ip-10-0-133-147.ec2.internal" is not ready
while in the next test it eventually goes ready:

I0412 08:06:28.961206    4971 utils.go:233] Node "ip-10-0-133-147.ec2.internal". Ready: true. Unschedulable: false
We are timing out only recently since the time for a node to go ready increased slightly and still to a reasonable amount of time. Is difficult to say though the reason for this yet, might be related to crio changes, to skew between bootimage and machine-os-content image and pivoting, CI cloud rate limits, or similar factors.
---
 pkg/e2e/framework/framework.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/e2e/framework/framework.go b/pkg/e2e/framework/framework.go
index 26ecf23ed..1c8667ed3 100644
--- a/pkg/e2e/framework/framework.go
+++ b/pkg/e2e/framework/framework.go
@@ -436,7 +436,7 @@ func IsNodeReady(node *corev1.Node) bool {
 }
 
 func WaitUntilAllNodesAreReady(client runtimeclient.Client) error {
-	return wait.PollImmediate(1*time.Second, time.Minute, func() (bool, error) {
+	return wait.PollImmediate(1*time.Second, PoolNodesReadyTimeout, func() (bool, error) {
 		nodeList := corev1.NodeList{}
 		if err := client.List(context.TODO(), &runtimeclient.ListOptions{}, &nodeList); err != nil {
 			glog.Errorf("error querying api for nodeList object: %v, retrying...", err)