diff --git a/tests/Makefile b/tests/Makefile index 4feede75a..0776da47c 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -14,7 +14,7 @@ GINKGO_FLAGS= NGF_VERSION= CI=false TELEMETRY_ENDPOINT= -TELEMETRY_ENDPOINT_INSECURE= +TELEMETRY_ENDPOINT_INSECURE=false ifneq ($(GINKGO_LABEL),) override GINKGO_FLAGS += --label-filter "$(GINKGO_LABEL)" diff --git a/tests/framework/request.go b/tests/framework/request.go index 674a35ed4..f5a03c32a 100644 --- a/tests/framework/request.go +++ b/tests/framework/request.go @@ -3,6 +3,7 @@ package framework import ( "bytes" "context" + "crypto/tls" "fmt" "net" "net/http" @@ -34,7 +35,18 @@ func Get(url, address string, timeout time.Duration) (int, string, error) { return 0, "", err } - resp, err := http.DefaultClient.Do(req) + var resp *http.Response + if strings.HasPrefix(url, "https") { + customTransport := http.DefaultTransport.(*http.Transport).Clone() + // similar to how in our examples with https requests we run our curl command + // we turn off verification of the certificate, we do the same here + customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} //nolint:gosec // for https test traffic + client := &http.Client{Transport: customTransport} + resp, err = client.Do(req) + } else { + resp, err = http.DefaultClient.Do(req) + } + if err != nil { return 0, "", err } diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index e62cb5db8..fdc996c5d 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -124,12 +124,12 @@ func (rm *ResourceManager) ApplyFromFiles(files []string, namespace string) erro } // Delete deletes Kubernetes resources defined as Go objects. -func (rm *ResourceManager) Delete(resources []client.Object) error { +func (rm *ResourceManager) Delete(resources []client.Object, opts ...client.DeleteOption) error { for _, resource := range resources { ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.DeleteTimeout) defer cancel() - if err := rm.K8sClient.Delete(ctx, resource); err != nil && !apierrors.IsNotFound(err) { + if err := rm.K8sClient.Delete(ctx, resource, opts...); err != nil && !apierrors.IsNotFound(err) { return fmt.Errorf("error deleting resource: %w", err) } } @@ -159,7 +159,7 @@ func (rm *ResourceManager) readAndHandleObjects( files []string, ) error { for _, file := range files { - data, err := rm.getFileContents(file) + data, err := rm.GetFileContents(file) if err != nil { return err } @@ -187,9 +187,9 @@ func (rm *ResourceManager) readAndHandleObjects( return nil } -// getFileContents takes a string that can either be a local file +// GetFileContents takes a string that can either be a local file // path or an https:// URL to YAML manifests and provides the contents. -func (rm *ResourceManager) getFileContents(file string) (*bytes.Buffer, error) { +func (rm *ResourceManager) GetFileContents(file string) (*bytes.Buffer, error) { if strings.HasPrefix(file, "http://") { return nil, fmt.Errorf("data can't be retrieved from %s: http is not supported, use https", file) } else if strings.HasPrefix(file, "https://") { @@ -314,7 +314,7 @@ func (rm *ResourceManager) waitForRoutesToBeReady(ctx context.Context, namespace var numParents, readyCount int for _, route := range routeList.Items { - numParents += len(route.Status.Parents) + numParents += len(route.Spec.ParentRefs) for _, parent := range route.Status.Parents { for _, cond := range parent.Conditions { if cond.Type == string(v1.RouteConditionAccepted) && cond.Status == metav1.ConditionTrue { diff --git a/tests/framework/timeout.go b/tests/framework/timeout.go index d49e988a5..2aee2e5a2 100644 --- a/tests/framework/timeout.go +++ b/tests/framework/timeout.go @@ -17,15 +17,23 @@ type TimeoutConfig struct { // RequestTimeout represents the maximum time for making an HTTP Request with the roundtripper. RequestTimeout time.Duration + + // ContainerRestartTimeout represents the maximum time for a Kubernetes Container to restart. + ContainerRestartTimeout time.Duration + + // GetLeaderLeaseTimeout represents the maximum time for NGF to retrieve the leader lease. + GetLeaderLeaseTimeout time.Duration } // DefaultTimeoutConfig populates a TimeoutConfig with the default values. func DefaultTimeoutConfig() TimeoutConfig { return TimeoutConfig{ - CreateTimeout: 60 * time.Second, - DeleteTimeout: 10 * time.Second, - GetTimeout: 10 * time.Second, - ManifestFetchTimeout: 10 * time.Second, - RequestTimeout: 10 * time.Second, + CreateTimeout: 60 * time.Second, + DeleteTimeout: 10 * time.Second, + GetTimeout: 10 * time.Second, + ManifestFetchTimeout: 10 * time.Second, + RequestTimeout: 10 * time.Second, + ContainerRestartTimeout: 10 * time.Second, + GetLeaderLeaseTimeout: 60 * time.Second, } } diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go new file mode 100644 index 000000000..bf6c61295 --- /dev/null +++ b/tests/suite/graceful_recovery_test.go @@ -0,0 +1,362 @@ +package suite + +import ( + "context" + "errors" + "fmt" + "net/http" + "strings" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/batch/v1" + coordination "k8s.io/api/coordination/v1" + core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" + + "github.com/nginxinc/nginx-gateway-fabric/tests/framework" +) + +const ( + nginxContainerName = "nginx" + ngfContainerName = "nginx-gateway" +) + +// Since checkContainerLogsForErrors may experience interference from previous tests (as explained in the function +// documentation), this test is recommended to be run separate from other nfr tests. +var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recovery"), func() { + files := []string{ + "graceful-recovery/cafe.yaml", + "graceful-recovery/cafe-secret.yaml", + "graceful-recovery/gateway.yaml", + "graceful-recovery/cafe-routes.yaml", + } + + ns := &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "graceful-recovery", + }, + } + + teaURL := "https://cafe.example.com/tea" + coffeeURL := "http://cafe.example.com/coffee" + + var ngfPodName string + + BeforeAll(func() { + // this test is unique in that it will check the entire log of both ngf and nginx containers + // for any errors, so in order to avoid errors generated in previous tests we will uninstall + // NGF installed at the suite level, then re-deploy our own + teardown(releaseName) + + setup(getDefaultSetupCfg()) + + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).To(HaveLen(1)) + + ngfPodName = podNames[0] + }) + + BeforeEach(func() { + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + + Eventually( + func() error { + return checkForWorkingTraffic(teaURL, coffeeURL) + }). + WithTimeout(timeoutConfig.RequestTimeout). + WithPolling(500 * time.Millisecond). + Should(Succeed()) + }) + + AfterAll(func() { + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + }) + + It("recovers when NGF container is restarted", func() { + runRecoveryTest(teaURL, coffeeURL, ngfPodName, ngfContainerName, files, ns) + }) + + It("recovers when nginx container is restarted", func() { + // FIXME(bjee19) remove Skip() when https://github.com/nginxinc/nginx-gateway-fabric/issues/1108 is completed. + Skip("Test currently fails due to this issue: https://github.com/nginxinc/nginx-gateway-fabric/issues/1108") + runRecoveryTest(teaURL, coffeeURL, ngfPodName, nginxContainerName, files, ns) + }) +}) + +func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files []string, ns *core.Namespace) { + var ( + err error + leaseName string + ) + + if containerName != nginxContainerName { + // Since we have already deployed resources and ran resourceManager.WaitForAppsToBeReady(ns.Name) earlier, + // we know that the applications are ready at this point. This could only be the case if NGF has written + // statuses, which could only be the case if NGF has the leader lease. Since there is only one instance + // of NGF in this test, we can be certain that this is the correct leaseholder name. + leaseName, err = getLeaderElectionLeaseHolderName() + Expect(err).ToNot(HaveOccurred()) + } + + restartContainer(ngfPodName, containerName) + + if containerName != nginxContainerName { + Eventually( + func() error { + return checkLeaderLeaseChange(leaseName) + }). + WithTimeout(timeoutConfig.GetLeaderLeaseTimeout). + WithPolling(500 * time.Millisecond). + Should(Succeed()) + } + + Eventually( + func() error { + return checkForWorkingTraffic(teaURL, coffeeURL) + }). + WithTimeout(timeoutConfig.RequestTimeout). + WithPolling(500 * time.Millisecond). + Should(Succeed()) + + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + + Eventually( + func() error { + return checkForFailingTraffic(teaURL, coffeeURL) + }). + WithTimeout(timeoutConfig.RequestTimeout). + WithPolling(500 * time.Millisecond). + Should(Succeed()) + + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + + Eventually( + func() error { + return checkForWorkingTraffic(teaURL, coffeeURL) + }). + WithTimeout(timeoutConfig.RequestTimeout). + WithPolling(500 * time.Millisecond). + Should(Succeed()) + + checkContainerLogsForErrors(ngfPodName) +} + +func restartContainer(ngfPodName, containerName string) { + var jobScript string + if containerName == "nginx" { + jobScript = "PID=$(pgrep -f \"nginx: master process\") && kill -9 $PID" + } else { + jobScript = "PID=$(pgrep -f \"/usr/bin/gateway\") && kill -9 $PID" + } + + restartCount, err := getContainerRestartCount(ngfPodName, containerName) + Expect(err).ToNot(HaveOccurred()) + + job, err := runNodeDebuggerJob(ngfPodName, jobScript) + Expect(err).ToNot(HaveOccurred()) + + Eventually( + func() error { + return checkContainerRestart(ngfPodName, containerName, restartCount) + }). + WithTimeout(timeoutConfig.ContainerRestartTimeout). + WithPolling(500 * time.Millisecond). + Should(Succeed()) + + // default propagation policy is metav1.DeletePropagationOrphan which does not delete the underlying + // pod created through the job after the job is deleted. Setting it to metav1.DeletePropagationBackground + // deletes the underlying pod after the job is deleted. + Expect(resourceManager.Delete( + []client.Object{job}, + client.PropagationPolicy(metav1.DeletePropagationBackground), + )).To(Succeed()) +} + +func checkContainerRestart(ngfPodName, containerName string, currentRestartCount int) error { + restartCount, err := getContainerRestartCount(ngfPodName, containerName) + if err != nil { + return err + } + + if restartCount != currentRestartCount+1 { + return fmt.Errorf("expected current restart count: %d to match incremented restart count: %d", restartCount, currentRestartCount+1) + } + + return nil +} + +func checkForWorkingTraffic(teaURL, coffeeURL string) error { + if err := expectRequestToSucceed(teaURL, address, "URI: /tea"); err != nil { + return err + } + if err := expectRequestToSucceed(coffeeURL, address, "URI: /coffee"); err != nil { + return err + } + return nil +} + +func checkForFailingTraffic(teaURL, coffeeURL string) error { + if err := expectRequestToFail(teaURL, address, "URI: /tea"); err != nil { + return err + } + if err := expectRequestToFail(coffeeURL, address, "URI: /coffee"); err != nil { + return err + } + return nil +} + +func expectRequestToSucceed(appURL, address string, responseBodyMessage string) error { + status, body, err := framework.Get(appURL, address, timeoutConfig.RequestTimeout) + if status != http.StatusOK { + return errors.New("http status was not 200") + } + + if !strings.Contains(body, responseBodyMessage) { + return errors.New("expected response body to contain correct body message") + } + + return err +} + +func expectRequestToFail(appURL, address string, responseBodyMessage string) error { + status, body, err := framework.Get(appURL, address, timeoutConfig.RequestTimeout) + if status != 0 { + return errors.New("expected http status to be 0") + } + + if body != "" { + return fmt.Errorf("expected response body to be empty, instead received: %s", body) + } + + if err == nil { + return errors.New("expected request to error") + } + + return nil +} + +// checkContainerLogsForErrors checks both nginx and ngf container's logs for any possible errors. +// Since this function retrieves all the logs from both containers and the NGF pod may be shared between tests, +// the logs retrieved may contain log messages from previous tests, thus any errors in the logs from previous tests +// may cause an interference with this test and cause this test to fail. +func checkContainerLogsForErrors(ngfPodName string) { + logs, err := resourceManager.GetPodLogs( + ngfNamespace, + ngfPodName, + &core.PodLogOptions{Container: nginxContainerName}, + ) + Expect(err).ToNot(HaveOccurred()) + + for _, line := range strings.Split(logs, "\n") { + Expect(line).ToNot(ContainSubstring("[crit]"), line) + Expect(line).ToNot(ContainSubstring("[alert]"), line) + Expect(line).ToNot(ContainSubstring("[emerg]"), line) + if strings.Contains(line, "[error]") { + Expect(line).To(ContainSubstring("connect() failed (111: Connection refused)"), line) + } + } + + logs, err = resourceManager.GetPodLogs( + ngfNamespace, + ngfPodName, + &core.PodLogOptions{Container: ngfContainerName}, + ) + Expect(err).ToNot(HaveOccurred()) + Expect(logs).ToNot(ContainSubstring("\"level\":\"error\""), logs) +} + +func checkLeaderLeaseChange(originalLeaseName string) error { + leaseName, err := getLeaderElectionLeaseHolderName() + if err != nil { + return err + } + + if originalLeaseName == leaseName { + return fmt.Errorf("expected originalLeaseName: %s, to not match current leaseName: %s", originalLeaseName, leaseName) + } + + return nil +} + +func getLeaderElectionLeaseHolderName() (string, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var lease coordination.Lease + key := types.NamespacedName{Name: "ngf-test-nginx-gateway-fabric-leader-election", Namespace: ngfNamespace} + + if err := k8sClient.Get(ctx, key, &lease); err != nil { + return "", errors.New("could not retrieve leader election lease") + } + + if *lease.Spec.HolderIdentity == "" { + return "", errors.New("leader election lease holder identity is empty") + } + + return *lease.Spec.HolderIdentity, nil +} + +func getContainerRestartCount(ngfPodName, containerName string) (int, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var ngfPod core.Pod + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil { + return 0, fmt.Errorf("error retriving NGF Pod: %w", err) + } + + var restartCount int + for _, containerStatus := range ngfPod.Status.ContainerStatuses { + if containerStatus.Name == containerName { + restartCount = int(containerStatus.RestartCount) + } + } + + return restartCount, nil +} + +func runNodeDebuggerJob(ngfPodName, jobScript string) (*v1.Job, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) + defer cancel() + + var ngfPod core.Pod + if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ngfNamespace, Name: ngfPodName}, &ngfPod); err != nil { + return nil, fmt.Errorf("error retriving NGF Pod: %w", err) + } + + b, err := resourceManager.GetFileContents("graceful-recovery/node-debugger-job.yaml") + if err != nil { + return nil, fmt.Errorf("error processing node debugger job file: %w", err) + } + + job := &v1.Job{} + if err = yaml.Unmarshal(b.Bytes(), job); err != nil { + return nil, fmt.Errorf("error with yaml unmarshal: %w", err) + } + + job.Spec.Template.Spec.NodeSelector["kubernetes.io/hostname"] = ngfPod.Spec.NodeName + if len(job.Spec.Template.Spec.Containers) != 1 { + return nil, fmt.Errorf( + "expected node debugger job to contain one container, actual number: %d", + len(job.Spec.Template.Spec.Containers), + ) + } + job.Spec.Template.Spec.Containers[0].Args = []string{jobScript} + job.Namespace = ngfNamespace + + if err = resourceManager.Apply([]client.Object{job}); err != nil { + return nil, fmt.Errorf("error in applying job: %w", err) + } + + return job, nil +} diff --git a/tests/suite/manifests/graceful-recovery/cafe-routes.yaml b/tests/suite/manifests/graceful-recovery/cafe-routes.yaml new file mode 100644 index 000000000..5d63141a9 --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/cafe-routes.yaml @@ -0,0 +1,37 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: coffee +spec: + parentRefs: + - name: gateway + sectionName: http + hostnames: + - "cafe.example.com" + rules: + - matches: + - path: + type: PathPrefix + value: /coffee + backendRefs: + - name: coffee + port: 80 +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: tea +spec: + parentRefs: + - name: gateway + sectionName: https + hostnames: + - "cafe.example.com" + rules: + - matches: + - path: + type: PathPrefix + value: /tea + backendRefs: + - name: tea + port: 80 diff --git a/tests/suite/manifests/graceful-recovery/cafe-secret.yaml b/tests/suite/manifests/graceful-recovery/cafe-secret.yaml new file mode 100644 index 000000000..4510460bb --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/cafe-secret.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Secret +metadata: + name: cafe-secret +type: kubernetes.io/tls +data: + tls.crt: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUNzakNDQVpvQ0NRQzdCdVdXdWRtRkNEQU5CZ2txaGtpRzl3MEJBUXNGQURBYk1Sa3dGd1lEVlFRRERCQmoKWVdabExtVjRZVzF3YkdVdVkyOXRNQjRYRFRJeU1EY3hOREl4TlRJek9Wb1hEVEl6TURjeE5ESXhOVEl6T1ZvdwpHekVaTUJjR0ExVUVBd3dRWTJGbVpTNWxlR0Z0Y0d4bExtTnZiVENDQVNJd0RRWUpLb1pJaHZjTkFRRUJCUUFECmdnRVBBRENDQVFvQ2dnRUJBTHFZMnRHNFc5aStFYzJhdnV4Q2prb2tnUUx1ek10U1Rnc1RNaEhuK3ZRUmxIam8KVzFLRnMvQVdlS25UUStyTWVKVWNseis4M3QwRGtyRThwUisxR2NKSE50WlNMb0NEYUlRN0Nhck5nY1daS0o4Qgo1WDNnVS9YeVJHZjI2c1REd2xzU3NkSEQ1U2U3K2Vab3NPcTdHTVF3K25HR2NVZ0VtL1Q1UEMvY05PWE0zZWxGClRPL051MStoMzROVG9BbDNQdTF2QlpMcDNQVERtQ0thaEROV0NWbUJQUWpNNFI4VERsbFhhMHQ5Z1o1MTRSRzUKWHlZWTNtdzZpUzIrR1dYVXllMjFuWVV4UEhZbDV4RHY0c0FXaGRXbElweHlZQlNCRURjczN6QlI2bFF1OWkxZAp0R1k4dGJ3blVmcUVUR3NZdWxzc05qcU95V1VEcFdJelhibHhJZVVDQXdFQUFUQU5CZ2txaGtpRzl3MEJBUXNGCkFBT0NBUUVBcjkrZWJ0U1dzSnhLTGtLZlRkek1ISFhOd2Y5ZXFVbHNtTXZmMGdBdWVKTUpUR215dG1iWjlpbXQKL2RnWlpYVE9hTElHUG9oZ3BpS0l5eVVRZVdGQ2F0NHRxWkNPVWRhbUloOGk0Q1h6QVJYVHNvcUNOenNNLzZMRQphM25XbFZyS2lmZHYrWkxyRi8vblc0VVNvOEoxaCtQeDljY0tpRDZZU0RVUERDRGh1RUtFWXcvbHpoUDJVOXNmCnl6cEJKVGQ4enFyM3paTjNGWWlITmgzYlRhQS82di9jU2lyamNTK1EwQXg4RWpzQzYxRjRVMTc4QzdWNWRCKzQKcmtPTy9QNlA0UFlWNTRZZHMvRjE2WkZJTHFBNENCYnExRExuYWRxamxyN3NPbzl2ZzNnWFNMYXBVVkdtZ2todAp6VlZPWG1mU0Z4OS90MDBHUi95bUdPbERJbWlXMGc9PQotLS0tLUVORCBDRVJUSUZJQ0FURS0tLS0tCg== + tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRQzZtTnJSdUZ2WXZoSE4KbXI3c1FvNUtKSUVDN3N6TFVrNExFeklSNS9yMEVaUjQ2RnRTaGJQd0ZuaXAwMFBxekhpVkhKYy92TjdkQTVLeApQS1VmdFJuQ1J6YldVaTZBZzJpRU93bXF6WUhGbVNpZkFlVjk0RlAxOGtSbjl1ckV3OEpiRXJIUncrVW51L25tCmFMRHF1eGpFTVBweGhuRklCSnYwK1R3djNEVGx6TjNwUlV6dnpidGZvZCtEVTZBSmR6N3Rid1dTNmR6MHc1Z2kKbW9RelZnbFpnVDBJek9FZkV3NVpWMnRMZllHZWRlRVJ1VjhtR041c09va3R2aGxsMU1udHRaMkZNVHgySmVjUQo3K0xBRm9YVnBTS2NjbUFVZ1JBM0xOOHdVZXBVTHZZdFhiUm1QTFc4SjFINmhFeHJHTHBiTERZNmpzbGxBNlZpCk0xMjVjU0hsQWdNQkFBRUNnZ0VBQnpaRE50bmVTdWxGdk9HZlFYaHRFWGFKdWZoSzJBenRVVVpEcUNlRUxvekQKWlV6dHdxbkNRNlJLczUyandWNTN4cU9kUU94bTNMbjNvSHdNa2NZcEliWW82MjJ2dUczYnkwaVEzaFlsVHVMVgpqQmZCcS9UUXFlL2NMdngvSkczQWhFNmJxdFRjZFlXeGFmTmY2eUtpR1dzZk11WVVXTWs4MGVJVUxuRmZaZ1pOCklYNTlSOHlqdE9CVm9Sa3hjYTVoMW1ZTDFsSlJNM3ZqVHNHTHFybmpOTjNBdWZ3ZGRpK1VDbGZVL2l0K1EvZkUKV216aFFoTlRpNVFkRWJLVStOTnYvNnYvb2JvandNb25HVVBCdEFTUE05cmxFemIralQ1WHdWQjgvLzRGY3VoSwoyVzNpcjhtNHVlQ1JHSVlrbGxlLzhuQmZ0eVhiVkNocVRyZFBlaGlPM1FLQmdRRGlrR3JTOTc3cjg3Y1JPOCtQClpoeXltNXo4NVIzTHVVbFNTazJiOTI1QlhvakpZL2RRZDVTdFVsSWE4OUZKZnNWc1JRcEhHaTFCYzBMaTY1YjIKazR0cE5xcVFoUmZ1UVh0UG9GYXRuQzlPRnJVTXJXbDVJN0ZFejZnNkNQMVBXMEg5d2hPemFKZUdpZVpNYjlYTQoybDdSSFZOcC9jTDlYbmhNMnN0Q1lua2Iwd0tCZ1FEUzF4K0crakEyUVNtRVFWNXA1RnRONGcyamsyZEFjMEhNClRIQ2tTazFDRjhkR0Z2UWtsWm5ZbUt0dXFYeXNtekJGcnZKdmt2eUhqbUNYYTducXlpajBEdDZtODViN3BGcVAKQWxtajdtbXI3Z1pUeG1ZMXBhRWFLMXY4SDNINGtRNVl3MWdrTWRybVJHcVAvaTBGaDVpaGtSZS9DOUtGTFVkSQpDcnJjTzhkUVp3S0JnSHA1MzRXVWNCMVZibzFlYStIMUxXWlFRUmxsTWlwRFM2TzBqeWZWSmtFb1BZSEJESnp2ClIrdzZLREJ4eFoyWmJsZ05LblV0YlhHSVFZd3lGelhNcFB5SGxNVHpiZkJhYmJLcDFyR2JVT2RCMXpXM09PRkgKcmppb21TUm1YNmxhaDk0SjRHU0lFZ0drNGw1SHhxZ3JGRDZ2UDd4NGRjUktJWFpLZ0w2dVJSSUpBb0dCQU1CVApaL2p5WStRNTBLdEtEZHUrYU9ORW4zaGxUN3hrNXRKN3NBek5rbWdGMU10RXlQUk9Xd1pQVGFJbWpRbk9qbHdpCldCZ2JGcXg0M2ZlQ1Z4ZXJ6V3ZEM0txaWJVbWpCTkNMTGtYeGh3ZEVteFQwVit2NzZGYzgwaTNNYVdSNnZZR08KditwVVovL0F6UXdJcWZ6dlVmV2ZxdStrMHlhVXhQOGNlcFBIRyt0bEFvR0FmQUtVVWhqeFU0Ym5vVzVwVUhKegpwWWZXZXZ5TW54NWZyT2VsSmRmNzlvNGMvMHhVSjh1eFBFWDFkRmNrZW96dHNpaVFTNkN6MENRY09XVWxtSkRwCnVrdERvVzM3VmNSQU1BVjY3NlgxQVZlM0UwNm5aL2g2Tkd4Z28rT042Q3pwL0lkMkJPUm9IMFAxa2RjY1NLT3kKMUtFZlNnb1B0c1N1eEpBZXdUZmxDMXc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K diff --git a/tests/suite/manifests/graceful-recovery/cafe.yaml b/tests/suite/manifests/graceful-recovery/cafe.yaml new file mode 100644 index 000000000..be4ad51a9 --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/cafe.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: coffee +spec: + replicas: 1 + selector: + matchLabels: + app: coffee + template: + metadata: + labels: + app: coffee + spec: + containers: + - name: coffee + image: nginxdemos/nginx-hello:plain-text + ports: + - containerPort: 8080 + readinessProbe: + httpGet: + path: / + port: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: coffee +spec: + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + selector: + app: coffee +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tea +spec: + replicas: 1 + selector: + matchLabels: + app: tea + template: + metadata: + labels: + app: tea + spec: + containers: + - name: tea + image: nginxdemos/nginx-hello:plain-text + ports: + - containerPort: 8080 + readinessProbe: + httpGet: + path: / + port: 8080 +--- +apiVersion: v1 +kind: Service +metadata: + name: tea +spec: + ports: + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + selector: + app: tea diff --git a/tests/suite/manifests/graceful-recovery/gateway.yaml b/tests/suite/manifests/graceful-recovery/gateway.yaml new file mode 100644 index 000000000..6789002ab --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/gateway.yaml @@ -0,0 +1,20 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: gateway +spec: + gatewayClassName: nginx + listeners: + - name: http + port: 80 + protocol: HTTP + hostname: "*.example.com" + - name: https + port: 443 + protocol: HTTPS + hostname: "*.example.com" + tls: + mode: Terminate + certificateRefs: + - kind: Secret + name: cafe-secret diff --git a/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml new file mode 100644 index 000000000..dcc3d5ef1 --- /dev/null +++ b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml @@ -0,0 +1,27 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: node-debugger-job +spec: + template: + spec: + hostPID: true + hostIPC: true + nodeSelector: + kubernetes.io/hostname: "to be replaced by the test" + containers: + - name: node-debugger-container + image: ubuntu:22.04 + command: ["/bin/bash", "-c"] + args: ["to be replaced by the test"] + securityContext: + privileged: true + volumeMounts: + - name: host-fs + mountPath: /mnt/host + volumes: + - name: host-fs + hostPath: + path: / + type: Directory + restartPolicy: Never diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index d7786aaa3..7c1218d15 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -14,6 +14,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" apps "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" coordination "k8s.io/api/coordination/v1" core "k8s.io/api/core/v1" apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" @@ -96,6 +97,7 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { Expect(apiext.AddToScheme(scheme)).To(Succeed()) Expect(coordination.AddToScheme(scheme)).To(Succeed()) Expect(v1.AddToScheme(scheme)).To(Succeed()) + Expect(batchv1.AddToScheme(scheme)).To(Succeed()) options := client.Options{ Scheme: scheme, @@ -245,7 +247,9 @@ var _ = BeforeSuite(func() { // - running telemetry test (NGF will be deployed as part of the test) if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") || - strings.Contains(labelFilter, "telemetry") { + strings.Contains(labelFilter, "telemetry") || + strings.Contains(labelFilter, "graceful-recovery") { + cfg.deploy = false } @@ -278,5 +282,6 @@ func isNFR(labelFilter string) bool { return strings.Contains(labelFilter, "nfr") || strings.Contains(labelFilter, "longevity") || strings.Contains(labelFilter, "performance") || - strings.Contains(labelFilter, "upgrade") + strings.Contains(labelFilter, "upgrade") || + strings.Contains(labelFilter, "graceful-recovery") }