From f19345b5f35ca9e0c1a84183772c78e2472b5904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Sevilla?= Date: Fri, 17 Feb 2023 14:40:40 +0100 Subject: [PATCH] Delete namespaces with timeout (#249) * Delete namespaces with timeout With the old implementation, when a job times out the created namespaces are not garbage collected. Signed-off-by: Raul Sevilla * Fix node-density-cni startup probe Signed-off-by: Raul Sevilla * Enable alerting in CI Signed-off-by: Raul Sevilla * Minor typo Signed-off-by: Raul Sevilla --------- Signed-off-by: Raul Sevilla --- cmd/kube-burner/kube-burner.go | 7 ++++- .../node-density-cni/curl-deployment.yml | 2 +- pkg/burner/create.go | 7 +++-- pkg/burner/job.go | 18 +++++++++---- pkg/burner/namespaces.go | 27 +++++++++++++------ pkg/burner/pre_load.go | 5 +++- pkg/burner/utils.go | 7 ----- test/run-ocp.sh | 4 +-- 8 files changed, 50 insertions(+), 27 deletions(-) diff --git a/cmd/kube-burner/kube-burner.go b/cmd/kube-burner/kube-burner.go index 3756a625c..245d3bca9 100644 --- a/cmd/kube-burner/kube-burner.go +++ b/cmd/kube-burner/kube-burner.go @@ -15,6 +15,7 @@ package main import ( + "context" "fmt" "os" "path/filepath" @@ -159,6 +160,7 @@ func initCmd() *cobra.Command { func destroyCmd() *cobra.Command { var uuid, configFile string + var timeout time.Duration var err error cmd := &cobra.Command{ Use: "destroy", @@ -176,10 +178,13 @@ func destroyCmd() *cobra.Command { if err != nil { log.Fatalf("Error creating clientSet: %s", err) } - burner.CleanupNamespaces(listOptions) + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + burner.CleanupNamespaces(ctx, listOptions) }, } cmd.Flags().StringVar(&uuid, "uuid", "", "UUID") + cmd.Flags().DurationVarP(&timeout, "timeout", "", 4*time.Hour, "Deletion timeout") cmd.MarkFlagRequired("uuid") return cmd } diff --git a/cmd/kube-burner/ocp-config/node-density-cni/curl-deployment.yml b/cmd/kube-burner/ocp-config/node-density-cni/curl-deployment.yml index a7e871125..de96b7196 100644 --- a/cmd/kube-burner/ocp-config/node-density-cni/curl-deployment.yml +++ b/cmd/kube-burner/ocp-config/node-density-cni/curl-deployment.yml @@ -40,7 +40,7 @@ spec: command: - "/bin/sh" - "-c" - - "curl ${WEBSERVER_HOSTNAME}:${WEBSERVER_PORT}" + - "curl --fail -sS ${WEBSERVER_HOSTNAME}:${WEBSERVER_PORT} -o /dev/null" periodSeconds: 1 timeoutSeconds: 1 failureThreshold: 600 diff --git a/pkg/burner/create.go b/pkg/burner/create.go index d29b485ec..b1d4079bd 100644 --- a/pkg/burner/create.go +++ b/pkg/burner/create.go @@ -278,9 +278,12 @@ func (ex *Executor) RunCreateJobWithChurn() { log.Errorf("Error patching namespace %s. Error: %v", nsName, err) } } - listOptions := metav1.ListOptions{LabelSelector: "churndelete=delete"} + // 1 hour timeout to delete namespaces + ctx, cancel := context.WithTimeout(context.Background(), time.Hour) + defer cancel() // Delete namespaces based on the label we added - CleanupNamespaces(listOptions) + log.Info("Garbage collecting created namespaces") + CleanupNamespaces(ctx, metav1.ListOptions{LabelSelector: "churndelete=delete"}) log.Info("Re-creating deleted objects") // Re-create objects that were deleted ex.RunCreateJob(randStart, numToChurn+randStart-1) diff --git a/pkg/burner/job.go b/pkg/burner/job.go index 8a6dac945..6c8b65242 100644 --- a/pkg/burner/job.go +++ b/pkg/burner/job.go @@ -15,6 +15,7 @@ package burner import ( + "context" "fmt" "sync" "time" @@ -110,7 +111,11 @@ func Run(configSpec config.Spec, uuid string, p *prometheus.Prometheus, alertM * measurements.SetJobConfig(&job.Config) switch job.Config.JobType { case config.CreationJob: - job.Cleanup() + if job.Config.Cleanup { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + CleanupNamespaces(ctx, job.selector.ListOptions) + } measurements.Start(&measurementsWg) measurementsWg.Wait() if job.Config.Churn { @@ -191,10 +196,6 @@ func Run(configSpec config.Spec, uuid string, p *prometheus.Prometheus, alertM * } } log.Infof("Finished execution with UUID: %s", uuid) - if configSpec.GlobalConfig.GC { - log.Info("Garbage collecting created namespaces") - CleanupNamespaces(v1.ListOptions{LabelSelector: fmt.Sprintf("kube-burner-uuid=%v", uuid)}) - } res <- innerRC }() select { @@ -203,6 +204,13 @@ func Run(configSpec config.Spec, uuid string, p *prometheus.Prometheus, alertM * log.Errorf("%v timeout reached", timeout) rc = rcTimeout } + if configSpec.GlobalConfig.GC { + // Use timeout/4 to garbage collect namespaces + ctx, cancel := context.WithTimeout(context.Background(), timeout/4) + defer cancel() + log.Info("Garbage collecting created namespaces") + CleanupNamespaces(ctx, v1.ListOptions{LabelSelector: fmt.Sprintf("kube-burner-uuid=%v", uuid)}) + } return rc, nil } diff --git a/pkg/burner/namespaces.go b/pkg/burner/namespaces.go index c04782bd7..10a3f4d78 100644 --- a/pkg/burner/namespaces.go +++ b/pkg/burner/namespaces.go @@ -54,30 +54,40 @@ func createNamespace(clientset *kubernetes.Clientset, namespaceName string, nsLa } // CleanupNamespaces deletes namespaces with the given selector -func CleanupNamespaces(l metav1.ListOptions) { - ns, _ := ClientSet.CoreV1().Namespaces().List(context.TODO(), l) +func CleanupNamespaces(ctx context.Context, l metav1.ListOptions) { + ns, _ := ClientSet.CoreV1().Namespaces().List(ctx, l) if len(ns.Items) > 0 { log.Infof("Deleting namespaces with label %s", l.LabelSelector) for _, ns := range ns.Items { - err := ClientSet.CoreV1().Namespaces().Delete(context.TODO(), ns.Name, metav1.DeleteOptions{}) + err := ClientSet.CoreV1().Namespaces().Delete(ctx, ns.Name, metav1.DeleteOptions{}) if errors.IsNotFound(err) { log.Warnf("Namespace %s not found", ns.Name) continue } + if ctx.Err() == context.DeadlineExceeded { + log.Fatalf("Timeout cleaning up namespaces: %v", err) + } if err != nil { - log.Errorf("Error cleaning up namespaces: %s", err) + log.Errorf("Error cleaning up namespaces: %v", err) } } } if len(ns.Items) > 0 { - waitForDeleteNamespaces(l) + if err := waitForDeleteNamespaces(ctx, l); err != nil { + if ctx.Err() == context.DeadlineExceeded { + log.Fatalf("Timeout cleaning up namespaces: %v", err) + } + if err != nil { + log.Errorf("Error cleaning up namespaces: %v", err) + } + } } } -func waitForDeleteNamespaces(l metav1.ListOptions) { +func waitForDeleteNamespaces(ctx context.Context, l metav1.ListOptions) error { log.Info("Waiting for namespaces to be definitely deleted") - wait.PollImmediateInfinite(time.Second, func() (bool, error) { - ns, err := ClientSet.CoreV1().Namespaces().List(context.TODO(), l) + err := wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) { + ns, err := ClientSet.CoreV1().Namespaces().List(ctx, l) if err != nil { return false, err } @@ -87,4 +97,5 @@ func waitForDeleteNamespaces(l metav1.ListOptions) { log.Debugf("Waiting for %d namespaces labeled with %s to be deleted", len(ns.Items), l.LabelSelector) return false, nil }) + return err } diff --git a/pkg/burner/pre_load.go b/pkg/burner/pre_load.go index ca125b8b8..1bcee9468 100644 --- a/pkg/burner/pre_load.go +++ b/pkg/burner/pre_load.go @@ -53,7 +53,10 @@ func preLoadImages(job Executor) error { log.Infof("Pre-load: Sleeping for %v", job.Config.PreLoadPeriod) time.Sleep(job.Config.PreLoadPeriod) log.Infof("Pre-load: Deleting namespace %s", preLoadNs) - CleanupNamespaces(v1.ListOptions{LabelSelector: "kube-burner-preload=true"}) + // 5 minutes should be more than enough to cleanup this namespace + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + CleanupNamespaces(ctx, v1.ListOptions{LabelSelector: "kube-burner-preload=true"}) return nil } diff --git a/pkg/burner/utils.go b/pkg/burner/utils.go index 406b47df0..28f97cb73 100644 --- a/pkg/burner/utils.go +++ b/pkg/burner/utils.go @@ -56,13 +56,6 @@ func yamlToUnstructured(y []byte, uns *unstructured.Unstructured) (runtime.Objec return o, gvk } -// Cleanup deletes old namespaces from a given job -func (ex *Executor) Cleanup() { - if ex.Config.Cleanup { - CleanupNamespaces(ex.selector.ListOptions) - } -} - // Verify verifies the number of created objects func (ex *Executor) Verify() bool { var objList *unstructured.UnstructuredList diff --git a/test/run-ocp.sh b/test/run-ocp.sh index 90ad67acf..5cb330226 100755 --- a/test/run-ocp.sh +++ b/test/run-ocp.sh @@ -13,7 +13,7 @@ die() { UUID=$(uuidgen) ES_SERVER="https://search-perfscale-dev-chmf5l4sh66lvxbnadi4bznl3a.us-west-2.es.amazonaws.com/" ES_INDEX="kube-burner-ocp" -COMMON_FLAGS="--es-server=${ES_SERVER} --es-index=${ES_INDEX} --alerting=false --uuid=${UUID} --qps=5 --burst=5" +COMMON_FLAGS="--es-server=${ES_SERVER} --es-index=${ES_INDEX} --alerting=true --uuid=${UUID} --qps=5 --burst=5" echo "Running node-density wrapper" kube-burner ocp node-density --pods-per-node=75 --pod-ready-threshold=10s --container-image=gcr.io/google_containers/pause:3.0 ${COMMON_FLAGS} @@ -24,7 +24,7 @@ kube-burner ocp cluster-density --iterations=3 --churn-duration=2m ${COMMON_FLAG echo "Running cluster-density wrapper w/o network-policies" kube-burner ocp cluster-density --iterations=2 --churn=false --uuid=${UUID} --network-policies=false # Disable gc and avoid metric indexing -echo "Running node-density-cni wrapper" +echo "Running node-density-cni wrapper with gc=false" kube-burner ocp node-density-cni --pods-per-node=75 --gc=false --uuid=${UUID} --alerting=false oc delete ns -l kube-burner-uuid=${UUID} trap - ERR