From a8c11f351858cd159a3f823b15860918c0490cbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Wo=C5=BAniak?= Date: Thu, 1 Feb 2024 21:26:50 +0100 Subject: [PATCH] Fix Kueue startup by waiting for webhooks server using probes (#1676) * Wait for webhooks server using probes * Delete KueueReadyForTesting * revert the setting of healthz * Add a comment about the readyz probe --- .../podtaintstolerations/test/e2e/suite_test.go | 12 ------------ cmd/kueue/main.go | 13 ++++++++++++- hack/e2e-common.sh | 1 + test/e2e/multikueue/suite_test.go | 7 ------- test/e2e/singlecluster/suite_test.go | 1 - test/util/e2e.go | 11 ----------- 6 files changed, 13 insertions(+), 32 deletions(-) diff --git a/cmd/experimental/podtaintstolerations/test/e2e/suite_test.go b/cmd/experimental/podtaintstolerations/test/e2e/suite_test.go index 8c9549aef982..2bb14704aac8 100644 --- a/cmd/experimental/podtaintstolerations/test/e2e/suite_test.go +++ b/cmd/experimental/podtaintstolerations/test/e2e/suite_test.go @@ -29,8 +29,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" - kueuetest "sigs.k8s.io/kueue/pkg/util/testing" - "sigs.k8s.io/kueue/test/util" ) var ( @@ -67,17 +65,7 @@ func CreateClientUsingCluster() client.Client { return client } -func KueueReadyForTesting(client client.Client) { - // To verify that webhooks are ready, let's create a simple resourceflavor - resourceKueue := kueuetest.MakeResourceFlavor("default").Obj() - gomega.Eventually(func() error { - return client.Create(context.Background(), resourceKueue) - }, Timeout, Interval).Should(gomega.Succeed()) - util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, resourceKueue, true) -} - var _ = ginkgo.BeforeSuite(func() { k8sClient = CreateClientUsingCluster() ctx = context.Background() - KueueReadyForTesting(k8sClient) }) diff --git a/cmd/kueue/main.go b/cmd/kueue/main.go index 4ca31bade1ed..ef39b9f3e2d2 100644 --- a/cmd/kueue/main.go +++ b/cmd/kueue/main.go @@ -19,6 +19,7 @@ package main import ( "context" "flag" + "net/http" "os" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) @@ -278,7 +279,17 @@ func setupProbeEndpoints(mgr ctrl.Manager) { setupLog.Error(err, "unable to set up health check") os.Exit(1) } - if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { + + // Wait for the webhook server to be listening before advertising the + // Kueue replica as ready. This allows users to wait with sending the first + // requests, requiring webhooks, until the Kueue deployment is available, so + // that the early requests are not rejected during the Kueue's startup. + // We wrap the call to GetWebhookServer in a closure to delay calling + // the function, otherwise a not fully-initialized webhook server (without + // ready certs) fails the start of the manager. + if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error { + return mgr.GetWebhookServer().StartedChecker()(req) + }); err != nil { setupLog.Error(err, "unable to set up ready check") os.Exit(1) } diff --git a/hack/e2e-common.sh b/hack/e2e-common.sh index b664e4e8ee45..1d0458628920 100644 --- a/hack/e2e-common.sh +++ b/hack/e2e-common.sh @@ -55,5 +55,6 @@ function cluster_kueue_deploy { else kubectl apply --server-side -k test/e2e/config fi + kubectl wait --for=condition=available --timeout=3m deployment/kueue-controller-manager -n kueue-system } diff --git a/test/e2e/multikueue/suite_test.go b/test/e2e/multikueue/suite_test.go index 8b21e7c307f9..b62eccfec6c6 100644 --- a/test/e2e/multikueue/suite_test.go +++ b/test/e2e/multikueue/suite_test.go @@ -62,11 +62,4 @@ var _ = ginkgo.BeforeSuite(func() { k8sWorker2Client = util.CreateClientUsingCluster("kind-" + worker2ClusterName) ctx = context.Background() - - //wait for the managers to start - // failing a this point might indicate a manifestation of - // https://kind.sigs.k8s.io/docs/user/known-issues/#pod-errors-due-to-too-many-open-files - util.KueueReadyForTesting(ctx, k8sManagerClient) - util.KueueReadyForTesting(ctx, k8sWorker1Client) - util.KueueReadyForTesting(ctx, k8sWorker2Client) }) diff --git a/test/e2e/singlecluster/suite_test.go b/test/e2e/singlecluster/suite_test.go index 82bf830db59c..e8c9872b5d8a 100644 --- a/test/e2e/singlecluster/suite_test.go +++ b/test/e2e/singlecluster/suite_test.go @@ -54,5 +54,4 @@ var _ = ginkgo.BeforeSuite(func() { visibilityClient = util.CreateVisibilityClient("") impersonatedVisibilityClient = util.CreateVisibilityClient("system:serviceaccount:kueue-system:default") ctx = context.Background() - util.KueueReadyForTesting(ctx, k8sClient) }) diff --git a/test/util/e2e.go b/test/util/e2e.go index 4539b02342f7..5ddad902eb04 100644 --- a/test/util/e2e.go +++ b/test/util/e2e.go @@ -1,7 +1,6 @@ package util import ( - "context" "fmt" "os" @@ -16,7 +15,6 @@ import ( visibility "sigs.k8s.io/kueue/apis/visibility/v1alpha1" kueueclientset "sigs.k8s.io/kueue/client-go/clientset/versioned" visibilityv1alpha1 "sigs.k8s.io/kueue/client-go/clientset/versioned/typed/visibility/v1alpha1" - utiltesting "sigs.k8s.io/kueue/pkg/util/testing" ) func CreateClientUsingCluster(kContext string) client.Client { @@ -64,12 +62,3 @@ func CreateVisibilityClient(user string) visibilityv1alpha1.VisibilityV1alpha1In visibilityClient := kueueClient.VisibilityV1alpha1() return visibilityClient } - -func KueueReadyForTesting(ctx context.Context, client client.Client) { - // To verify that webhooks are ready, let's create a simple resourceflavor - resourceKueue := utiltesting.MakeResourceFlavor("default").Obj() - gomega.Eventually(func() error { - return client.Create(context.Background(), resourceKueue) - }, StartUpTimeout, Interval).Should(gomega.Succeed()) - ExpectResourceFlavorToBeDeleted(ctx, client, resourceKueue, true) -}