From 15bab00d0f31c96eed0fe29f34ed7373fa0b9646 Mon Sep 17 00:00:00 2001 From: Saylor Berman Date: Mon, 11 Mar 2024 12:58:17 -0600 Subject: [PATCH] Automate longevity test (#1657) Problem: NFR tests are a burden to run manually, taking a lot of time and effort. Solution: Automate the longevity test to make it easier and faster for a developer to run this test. This test will be run separately from the other NFR tests, due to the fact that it is long-lived. It should not be run in the pipeline. There is still a manual step of collecting dashboard results. Also separated out functional and nfr tests in the Makefile and README to better separate the two types of tests. These changes force NFR tests to be run in a GKE environment. --- .github/workflows/nfr.yml | 4 +- .gitignore | 3 + .yamllint.yaml | 2 +- tests/Makefile | 78 ++++++--- tests/README.md | 99 ++++++++++-- tests/framework/results.go | 9 ++ tests/longevity/longevity.md | 151 ------------------ .../longevity}/1.0.0/1.0.0.md | 0 .../longevity}/1.0.0/cpu.png | Bin .../longevity}/1.0.0/memory.png | Bin .../longevity}/1.0.0/reload-time.png | Bin .../longevity}/1.0.0/reloads.png | Bin .../longevity}/1.0.0/stub-status.png | Bin .../longevity}/1.1.0/1.1.0.md | 0 .../longevity}/1.1.0/cpu.png | Bin .../longevity}/1.1.0/memory.png | Bin .../longevity}/1.1.0/reload-time.png | Bin .../longevity}/1.1.0/reloads.png | Bin .../longevity}/1.1.0/stub-status.png | Bin tests/scripts/create-gke-cluster.sh | 4 +- tests/scripts/remote-scripts/install-deps.sh | 2 +- tests/scripts/remote-scripts/run-nfr-tests.sh | 19 +++ tests/scripts/run-tests-gcp-vm.sh | 38 ++++- tests/scripts/sync-files-to-vm.sh | 9 ++ tests/suite/dataplane_perf_test.go | 2 +- tests/suite/longevity_test.go | 97 +++++++++++ .../manifests/longevity}/cafe-routes.yaml | 0 .../manifests/longevity}/cafe-secret.yaml | 0 .../manifests/longevity}/cafe.yaml | 0 .../manifests/longevity}/cronjob.yaml | 10 +- .../manifests/longevity}/gateway.yaml | 0 .../manifests/longevity}/prom.yaml | 2 +- tests/suite/sample_test.go | 2 +- tests/suite/scripts/longevity-wrk.sh | 9 ++ tests/suite/system_suite_test.go | 74 +++++++-- tests/suite/upgrade_test.go | 19 +-- 36 files changed, 390 insertions(+), 243 deletions(-) delete mode 100644 tests/longevity/longevity.md rename tests/{longevity/results => results/longevity}/1.0.0/1.0.0.md (100%) rename tests/{longevity/results => results/longevity}/1.0.0/cpu.png (100%) rename tests/{longevity/results => results/longevity}/1.0.0/memory.png (100%) rename tests/{longevity/results => results/longevity}/1.0.0/reload-time.png (100%) rename tests/{longevity/results => results/longevity}/1.0.0/reloads.png (100%) rename tests/{longevity/results => results/longevity}/1.0.0/stub-status.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/1.1.0.md (100%) rename tests/{longevity/results => results/longevity}/1.1.0/cpu.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/memory.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/reload-time.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/reloads.png (100%) rename tests/{longevity/results => results/longevity}/1.1.0/stub-status.png (100%) create mode 100644 tests/scripts/remote-scripts/run-nfr-tests.sh create mode 100755 tests/scripts/sync-files-to-vm.sh create mode 100644 tests/suite/longevity_test.go rename tests/{longevity/manifests => suite/manifests/longevity}/cafe-routes.yaml (100%) rename tests/{longevity/manifests => suite/manifests/longevity}/cafe-secret.yaml (100%) rename tests/{longevity/manifests => suite/manifests/longevity}/cafe.yaml (100%) rename tests/{longevity/manifests => suite/manifests/longevity}/cronjob.yaml (86%) rename tests/{longevity/manifests => suite/manifests/longevity}/gateway.yaml (100%) rename tests/{longevity/manifests => suite/manifests/longevity}/prom.yaml (79%) create mode 100755 tests/suite/scripts/longevity-wrk.sh diff --git a/.github/workflows/nfr.yml b/.github/workflows/nfr.yml index 9826b212c..8ce37fc73 100644 --- a/.github/workflows/nfr.yml +++ b/.github/workflows/nfr.yml @@ -144,9 +144,9 @@ jobs: working-directory: ./tests run: | if ${{ inputs.test_label != 'all' }}; then - sed -i '/^GINKGO_LABEL=/s/=.*/="${{ inputs.test_label }}"/' "scripts/vars.env" && make run-tests-on-vm; + sed -i '/^GINKGO_LABEL=/s/=.*/="${{ inputs.test_label }}"/' "scripts/vars.env" && make nfr-test; else - make run-tests-on-vm; + make nfr-test; fi - name: Cleanup diff --git a/.gitignore b/.gitignore index 81ad399f7..a87ca2ab3 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,6 @@ internal/mode/static/nginx/modules/coverage # Credential files **/gha-creds-*.json + +# SSH config files +*.ssh diff --git a/.yamllint.yaml b/.yamllint.yaml index 20470b80d..478262b7d 100644 --- a/.yamllint.yaml +++ b/.yamllint.yaml @@ -41,7 +41,7 @@ rules: .github/ deploy/manifests/nginx-gateway.yaml deploy/manifests/crds - tests/longevity/manifests/cronjob.yaml + tests/suite/manifests/longevity/cronjob.yaml .goreleaser.yml new-line-at-end-of-file: enable new-lines: enable diff --git a/tests/Makefile b/tests/Makefile index 6b32e47d0..463fb6f01 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -32,6 +32,10 @@ help: Makefile ## Display this help create-kind-cluster: ## Create a kind cluster cd .. && make create-kind-cluster +.PHONY: delete-kind-cluster +delete-kind-cluster: ## Delete kind cluster + kind delete cluster + .PHONY: build-images build-images: ## Build NGF and NGINX images cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) build-images @@ -48,46 +52,70 @@ load-images: ## Load NGF and NGINX images on configured kind cluster load-images-with-plus: ## Load NGF and NGINX Plus images on configured kind cluster cd .. && make PREFIX=$(PREFIX) TAG=$(TAG) load-images-with-plus -test: ## Run the system tests against your default k8s cluster - go test -v ./suite $(GINKGO_FLAGS) -args --gateway-api-version=$(GW_API_VERSION) \ - --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ - --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) \ - --pull-policy=$(PULL_POLICY) --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) \ - --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) +.PHONY: setup-gcp-and-run-tests +setup-gcp-and-run-tests: create-gke-router create-and-setup-vm run-tests-on-vm ## Create and setup a GKE router and GCP VM for tests and run the functional tests -.PHONY: delete-kind-cluster -delete-kind-cluster: ## Delete kind cluster - kind delete cluster +.PHONY: setup-gcp-and-run-nfr-tests +setup-gcp-and-run-nfr-tests: create-gke-router create-and-setup-vm nfr-test ## Create and setup a GKE router and GCP VM for tests and run the NFR tests -.PHONY: run-tests-on-vm -run-tests-on-vm: ## Run the tests on a GCP VM - bash scripts/run-tests-gcp-vm.sh +.PHONY: create-gke-cluster +create-gke-cluster: ## Create a GKE cluster + bash scripts/create-gke-cluster.sh $(CI) .PHONY: create-and-setup-vm create-and-setup-vm: ## Create and setup a GCP VM for tests bash scripts/create-and-setup-gcp-vm.sh -.PHONY: cleanup-vm -cleanup-vm: ## Delete the test GCP VM and delete the firewall rule - bash scripts/cleanup-vm.sh - .PHONY: create-gke-router create-gke-router: ## Create a GKE router to allow egress traffic from private nodes (allows for external image pulls) bash scripts/create-gke-router.sh -.PHONY: cleanup-router -cleanup-router: ## Delete the GKE router - bash scripts/cleanup-router.sh +.PHONY: sync-files-to-vm +sync-files-to-vm: ## Syncs your local NGF files with the NGF repo on the VM + bash scripts/sync-files-to-vm.sh -.PHONY: setup-gcp-and-run-tests -setup-gcp-and-run-tests: create-gke-router create-and-setup-vm run-tests-on-vm ## Create and setup a GKE router and GCP VM for tests and run the tests +.PHONY: run-tests-on-vm +run-tests-on-vm: ## Run the functional tests on a GCP VM + bash scripts/run-tests-gcp-vm.sh + +.PHONY: nfr-test +nfr-test: ## Run the NFR tests on a GCP VM + NFR=true bash scripts/run-tests-gcp-vm.sh + +.PHONY: start-longevity-test +start-longevity-test: ## Start the longevity test to run for 4 days in GKE + START_LONGEVITY=true $(MAKE) nfr-test + +.PHONY: stop-longevity-test +stop-longevity-test: ## Stops the longevity test and collects results + STOP_LONGEVITY=true $(MAKE) nfr-test + +.PHONY: .vm-nfr-test +.vm-nfr-test: ## Runs the NFR tests on the GCP VM (called by `nfr-test`) + go test -v ./suite -ginkgo.label-filter "nfr" $(GINKGO_FLAGS) -ginkgo.v -args --gateway-api-version=$(GW_API_VERSION) \ + --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ + --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) \ + --pull-policy=$(PULL_POLICY) --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) \ + --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) + +.PHONY: test +test: ## Runs the functional tests on your default k8s cluster + go test -v ./suite -ginkgo.label-filter "functional" $(GINKGO_FLAGS) -args --gateway-api-version=$(GW_API_VERSION) \ + --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ + --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) \ + --pull-policy=$(PULL_POLICY) --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) \ + --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) .PHONY: cleanup-gcp cleanup-gcp: cleanup-router cleanup-vm delete-gke-cluster ## Cleanup all GCP resources -.PHONY: create-gke-cluster -create-gke-cluster: ## Create a GKE cluster - bash scripts/create-gke-cluster.sh $(CI) +.PHONY: cleanup-router +cleanup-router: ## Delete the GKE router + bash scripts/cleanup-router.sh + +.PHONY: cleanup-vm +cleanup-vm: ## Delete the test GCP VM and delete the firewall rule + bash scripts/cleanup-vm.sh .PHONY: delete-gke-cluster delete-gke-cluster: ## Delete the GKE cluster @@ -95,4 +123,4 @@ delete-gke-cluster: ## Delete the GKE cluster .PHONY: add-local-ip-to-cluster add-local-ip-to-cluster: ## Add local IP to the GKE cluster master-authorized-networks - bash scripts/add-local-ip-to-cluster.sh + bash scripts/add-local-ip-auth-networks.sh diff --git a/tests/README.md b/tests/README.md index 07a8ea141..50d0f5945 100644 --- a/tests/README.md +++ b/tests/README.md @@ -4,19 +4,22 @@ The tests in this directory are meant to be run on a live Kubernetes environment are similar to the existing [conformance tests](../conformance/README.md), but will verify things such as: - NGF-specific functionality -- Non-Functional requirements testing (such as performance, scale, etc.) +- Non-Functional requirements (NFR) testing (such as performance, scale, etc.) When running locally, the tests create a port-forward from your NGF Pod to localhost using a port chosen by the test framework. Traffic is sent over this port. If running on a GCP VM targeting a GKE cluster, the tests will create an internal LoadBalancer service which will receive the test traffic. +**Important**: NFR tests can only be run on a GKE cluster. + Directory structure is as follows: - `framework`: contains utility functions for running the tests -- `suite`: contains the test files - `results`: contains the results files +- `scripts`: contain scripts used to set up the environment and run the tests +- `suite`: contains the test files -**Note**: Existing NFR tests will be migrated into this testing `suite` and results stored in the `results` directory. +> Note: Existing NFR tests will be migrated into this testing `suite` and results stored in the `results` directory. ## Prerequisites @@ -24,13 +27,13 @@ Directory structure is as follows: - Docker. - Golang. -If running the tests on a VM (`make create-vm-and-run-tests` or `make run-tests-on-vm`): +If running NFR tests, or running functional tests in GKE: - The [gcloud CLI](https://cloud.google.com/sdk/docs/install) - A GKE cluster (if `master-authorized-networks` is enabled, please set `ADD_VM_IP_AUTH_NETWORKS=true` in your vars.env file) - Access to GCP Service Account with Kubernetes admin permissions -**Note**: all commands in steps below are executed from the `tests` directory +> Note: all commands in steps below are executed from the `tests` directory ```shell make @@ -52,9 +55,14 @@ delete-kind-cluster Delete kind cluster help Display this help load-images-with-plus Load NGF and NGINX Plus images on configured kind cluster load-images Load NGF and NGINX images on configured kind cluster -run-tests-on-vm Run the tests on a GCP VM -setup-gcp-and-run-tests Create and setup a GKE router and GCP VM for tests and run the tests -test Run the system tests against your default k8s cluster +nfr-test Run the NFR tests on a GCP VM +run-tests-on-vm Run the functional tests on a GCP VM +setup-gcp-and-run-nfr-tests Create and setup a GKE router and GCP VM for tests and run the NFR tests +setup-gcp-and-run-tests Create and setup a GKE router and GCP VM for tests and run the functional tests +start-longevity-test Start the longevity test to run for 4 days in GKE +stop-longevity-test Stops the longevity test and collects results +sync-files-to-vm Syncs your local NGF files with the NGF repo on the VM +test Runs the functional tests on your default k8s cluster ``` **Note:** The following variables are configurable when running the below `make` commands: @@ -78,6 +86,8 @@ test Run the system tests against your default k8s clu This can be done in a cloud provider of choice, or locally using `kind`. +**Important**: NFR tests can only be run on a GKE cluster. + To create a local `kind` cluster: ```makefile @@ -128,7 +138,7 @@ make build-images-with-plus load-images-with-plus TAG=$(whoami) ## Step 3 - Run the tests -### 3a - Run the tests locally +### 3a - Run the functional tests locally ```makefile make test TAG=$(whoami) @@ -142,9 +152,9 @@ make test TAG=$(whoami) PLUS_ENABLED=true ### 3b - Run the tests on a GKE cluster from a GCP VM -This step only applies if you would like to run the tests on a GKE cluster from a GCP based VM. +This step only applies if you are running the NFR tests, or would like to run the functional tests on a GKE cluster from a GCP based VM. -Before running the below `make` command, copy the `scripts/vars.env-example` file to `scripts/vars.env` and populate the +Before running the below `make` commands, copy the `scripts/vars.env-example` file to `scripts/vars.env` and populate the required env vars. `GKE_SVC_ACCOUNT` needs to be the name of a service account that has Kubernetes admin permissions. In order to run the tests in GCP, you need a few things: @@ -153,30 +163,85 @@ In order to run the tests in GCP, you need a few things: - this assumes that your GKE cluster is using private nodes. If using public nodes, you don't need this. - GCP VM and firewall rule to send ingress traffic to GKE +To just set up the VM with no router (this will not run the tests): + +```makefile +make create-and-setup-vm +``` + +Otherwise, you can set up the VM, router, and run the tests with a single command. See the options in the sections below. + +By default, the tests run using the version of NGF that was `git cloned` during the setup. If you want to make +incremental changes and copy your local changes to the VM to test, you can run + +```makefile +make sync-files-to-vm +``` + +#### Functional Tests + To set up the GCP environment with the router and VM and then run the tests, run the following command: ```makefile make setup-gcp-and-run-tests ``` -If you just need a VM and no router (this will not run the tests): +To use an existing VM to run the tests, run the following ```makefile -make create-and-setup-vm +make run-tests-on-vm +``` + +#### NFR tests + +To set up the GCP environment with the router and VM and then run the tests, run the following command: + + +```makefile +make setup-gcp-and-run-nfr-tests ``` To use an existing VM to run the tests, run the following ```makefile -make run-tests-on-vm +make nfr-test +``` + +##### Longevity testing + +This test is run on its own (and also not in a pipeline) due to its long-running nature. It will run for 4 days before +the tester must collect the results and complete the test. + +To start the longevity test, set up your VM (`create-and-setup-vm`) and run + +```makefile +make start-longevity-test ``` + +> Note: If you want to change the time period for which the test runs, update the `wrk` commands in `suite/scripts/longevity-wrk.sh` to the time period you want, and run `make sync-files-to-vm`. + + +> Note: If you want to re-run the longevity test, you need to clear out the `cafe.example.com` entry from the `/etc/hosts` file on your VM. + +You can verify the test is working by checking nginx logs to see traffic flow, and check that the cronjob is running and redeploying apps. + +After 4 days (96h), you can complete the longevity tests and collect results. To ensure that the traffic has stopped flowing, you can ssh to the VM using `gcloud compute ssh` and run `ps aux | grep wrk` to verify the `wrk` commands are no longer running. Then, visit the [GCP Monitoring Dashboards](https://console.cloud.google.com/monitoring/dashboards) page and select the `NGF Longevity Test` dashboard. Take PNG screenshots of each chart for the time period in which your test ran, and save those to be added to the results file. + +Finally, run + +```makefile +make stop-longevity-test +``` + +This will tear down the test and collect results into a file, where you can add the PNGs of the dashboard. + ### Common test amendments -To run all tests with the label "performance", use the GINKGO_LABEL variable: +To run all tests with the label "my-label", use the GINKGO_LABEL variable: ```makefile -make test TAG=$(whoami) GINKGO_LABEL=performance +make test TAG=$(whoami) GINKGO_LABEL=my-label ``` or to pass a specific flag, e.g. run a specific test, use the GINKGO_FLAGS variable: @@ -185,6 +250,8 @@ or to pass a specific flag, e.g. run a specific test, use the GINKGO_FLAGS varia make test TAG=$(whoami) GINKGO_FLAGS='-ginkgo.focus "writes the system info to a results file"' ``` +> Note: if filtering on NFR tests (or functional tests on GKE), set the filter in the appropriate field in your `vars.env` file. + If you are running the tests in GCP, add your required label/ flags to `scripts/var.env`. You can also modify the tests code for a similar outcome. To run a specific test, you can "focus" it by adding the `F` diff --git a/tests/framework/results.go b/tests/framework/results.go index 5ea944563..429dd40e6 100644 --- a/tests/framework/results.go +++ b/tests/framework/results.go @@ -77,6 +77,15 @@ func WriteResults(resultsFile *os.File, metrics *Metrics) error { return reporter.Report(resultsFile) } +// WriteContent writes basic content to the results file. +func WriteContent(resultsFile *os.File, content string) error { + if _, err := fmt.Fprintln(resultsFile, content); err != nil { + return err + } + + return nil +} + // NewCSVEncoder returns a vegeta CSV encoder. func NewCSVEncoder(w io.Writer) vegeta.Encoder { return vegeta.NewCSVEncoder(w) diff --git a/tests/longevity/longevity.md b/tests/longevity/longevity.md deleted file mode 100644 index 1271678cc..000000000 --- a/tests/longevity/longevity.md +++ /dev/null @@ -1,151 +0,0 @@ -# Longevity Test - -This document describes how we test NGF for longevity. - - - -- [Longevity Test](#longevity-test) - - [Goals](#goals) - - [Test Environment](#test-environment) - - [Steps](#steps) - - [Start](#start) - - [Check the Test is Running Correctly](#check-the-test-is-running-correctly) - - [End](#end) - - [Analyze](#analyze) - - [Results](#results) - - - -## Goals - -- Ensure that NGF successfully processes both control plane and data plane transactions over a period of time much - greater than in our other tests. -- Catch bugs that could only appear over a period of time (like resource leaks). - -## Test Environment - -- A Kubernetes cluster with 3 nodes on GKE - - Node: e2-medium (2 vCPU, 4GB memory) - - Enabled GKE logging. - - Enabled GKE Cloud monitoring with managed Prometheus service, with enabled: - - system. - - kube state - pods, deployments. -- Tester VMs on Google Cloud: - - Configuration: - - Debian - - Install packages: tmux, wrk - - Location - same zone as the Kubernetes cluster. - - First VM - for HTTP traffic - - Second VM - for sending HTTPs traffic -- NGF - - Deployment with 1 replica - - Exposed via a Service with type LoadBalancer, private IP - - Gateway, two listeners - HTTP and HTTPs - - Two apps: - - Coffee - 3 replicas - - Tea - 3 replicas - - Two HTTPRoutes - - Coffee (HTTP) - - Tea (HTTPS) - -## Steps - -### Start - -Test duration - 4 days. - -1. Create a Kubernetes cluster on GKE. -2. Deploy NGF. -3. Expose NGF via a LoadBalancer Service with `"networking.gke.io/load-balancer-type":"Internal"` annotation to - allocate an internal load balancer. -4. Apply the manifests which will: - 1. Deploy the coffee and tea backends. - 2. Configure HTTP and HTTPS listeners on the Gateway. - 3. Expose coffee via HTTP listener and tea via HTTPS listener. - 4. Create two CronJobs to re-rollout backends: - 1. Coffee - every minute for an hour every 6 hours - 2. Tea - every minute for an hour every 6 hours, 3 hours apart from coffee. - 5. Configure Prometheus on GKE to pick up NGF metrics (NB: Ensure that the `app.kubernetes.io/name` label matches - your NGF deployment). - - ```shell - kubectl apply -f files - ``` - -5. In Tester VMs, update `/etc/hosts` to have an entry with the External IP of the NGF Service (`10.128.0.10` in this - case): - - ```text - 10.128.0.10 cafe.example.com - ``` - -6. In Tester VMs, start a tmux session (this is needed so that even if you disconnect from the VM, any launched command - will keep running): - - ```shell - tmux - ``` - -7. In First VM, start wrk for 4 days for coffee via HTTP: - - ```shell - wrk -t2 -c100 -d96h http://cafe.example.com/coffee - ``` - -8. In Second VM, start wrk for 4 days for tea via HTTPS: - - ```shell - wrk -t2 -c100 -d96h https://cafe.example.com/tea - ``` - -Notes: - -- The updated coffee and tea backends in cafe.yaml include extra configuration for zero time upgrades, so that - wrk in Tester VMs doesn't get 502 from NGF. Based on https://learnk8s.io/graceful-shutdown - -### Check the Test is Running Correctly - -Check that you don't see any errors: - -1. Check that GKE exports NGF pod logs to Google Cloud Operations Logging and Prometheus metrics to Google Cloud - Monitoring. -2. Check that traffic is flowing - look at the access logs of NGINX in Google Cloud Operations Logging. -3. Check that CronJob can run. - - ```shell - kubectl create job --from=cronjob/coffee-rollout-mgr coffee-test - kubectl create job --from=cronjob/tea-rollout-mgr tea-test - ``` - -In case of errors, double check if you prepared the environment and launched the test correctly. - -### End - -- Remove CronJobs. - -## Analyze - -- Traffic - - Tester VMs (clients) - - As wrk stop, they will print output upon termination. To connect to the tmux session with wrk, - run `tmux attach -t 0` - - Check for errors, latency, RPS -- Logs - - Check the logs for errors in Google Cloud Operations Logging. - - NGF - - NGINX -- Check metrics in Google Cloud Monitoring. - - NGF - - CPU usage - - NGINX - - NGF - - Memory usage - - NGINX - - NGF - - NGINX metrics - - Reloads - -## Results - -- [1.0.0](results/1.0.0/1.0.0.md) -- [1.1.0](results/1.1.0/1.1.0.md) diff --git a/tests/longevity/results/1.0.0/1.0.0.md b/tests/results/longevity/1.0.0/1.0.0.md similarity index 100% rename from tests/longevity/results/1.0.0/1.0.0.md rename to tests/results/longevity/1.0.0/1.0.0.md diff --git a/tests/longevity/results/1.0.0/cpu.png b/tests/results/longevity/1.0.0/cpu.png similarity index 100% rename from tests/longevity/results/1.0.0/cpu.png rename to tests/results/longevity/1.0.0/cpu.png diff --git a/tests/longevity/results/1.0.0/memory.png b/tests/results/longevity/1.0.0/memory.png similarity index 100% rename from tests/longevity/results/1.0.0/memory.png rename to tests/results/longevity/1.0.0/memory.png diff --git a/tests/longevity/results/1.0.0/reload-time.png b/tests/results/longevity/1.0.0/reload-time.png similarity index 100% rename from tests/longevity/results/1.0.0/reload-time.png rename to tests/results/longevity/1.0.0/reload-time.png diff --git a/tests/longevity/results/1.0.0/reloads.png b/tests/results/longevity/1.0.0/reloads.png similarity index 100% rename from tests/longevity/results/1.0.0/reloads.png rename to tests/results/longevity/1.0.0/reloads.png diff --git a/tests/longevity/results/1.0.0/stub-status.png b/tests/results/longevity/1.0.0/stub-status.png similarity index 100% rename from tests/longevity/results/1.0.0/stub-status.png rename to tests/results/longevity/1.0.0/stub-status.png diff --git a/tests/longevity/results/1.1.0/1.1.0.md b/tests/results/longevity/1.1.0/1.1.0.md similarity index 100% rename from tests/longevity/results/1.1.0/1.1.0.md rename to tests/results/longevity/1.1.0/1.1.0.md diff --git a/tests/longevity/results/1.1.0/cpu.png b/tests/results/longevity/1.1.0/cpu.png similarity index 100% rename from tests/longevity/results/1.1.0/cpu.png rename to tests/results/longevity/1.1.0/cpu.png diff --git a/tests/longevity/results/1.1.0/memory.png b/tests/results/longevity/1.1.0/memory.png similarity index 100% rename from tests/longevity/results/1.1.0/memory.png rename to tests/results/longevity/1.1.0/memory.png diff --git a/tests/longevity/results/1.1.0/reload-time.png b/tests/results/longevity/1.1.0/reload-time.png similarity index 100% rename from tests/longevity/results/1.1.0/reload-time.png rename to tests/results/longevity/1.1.0/reload-time.png diff --git a/tests/longevity/results/1.1.0/reloads.png b/tests/results/longevity/1.1.0/reloads.png similarity index 100% rename from tests/longevity/results/1.1.0/reloads.png rename to tests/results/longevity/1.1.0/reloads.png diff --git a/tests/longevity/results/1.1.0/stub-status.png b/tests/results/longevity/1.1.0/stub-status.png similarity index 100% rename from tests/longevity/results/1.1.0/stub-status.png rename to tests/results/longevity/1.1.0/stub-status.png diff --git a/tests/scripts/create-gke-cluster.sh b/tests/scripts/create-gke-cluster.sh index 20e7c08bc..9d034e1c6 100644 --- a/tests/scripts/create-gke-cluster.sh +++ b/tests/scripts/create-gke-cluster.sh @@ -14,7 +14,9 @@ gcloud container clusters create ${GKE_CLUSTER_NAME} \ --service-account ${GKE_NODES_SERVICE_ACCOUNT} \ --enable-private-nodes \ --master-ipv4-cidr 172.16.${ip_random_digit}.32/28 \ - --metadata=block-project-ssh-keys=TRUE + --metadata=block-project-ssh-keys=TRUE \ + --monitoring=SYSTEM,POD,DEPLOYMENT \ + --logging=SYSTEM,WORKLOAD # Add current IP to GKE master control node access, if this script is not invoked during a CI run. if [ "${IS_CI}" = "false" ]; then diff --git a/tests/scripts/remote-scripts/install-deps.sh b/tests/scripts/remote-scripts/install-deps.sh index 371f75ff6..1196a1f21 100644 --- a/tests/scripts/remote-scripts/install-deps.sh +++ b/tests/scripts/remote-scripts/install-deps.sh @@ -4,7 +4,7 @@ set -e source ~/vars.env -sudo apt-get -y update && sudo apt-get -y install git make kubectl google-cloud-sdk-gke-gcloud-auth-plugin jq gnuplot && \ +sudo apt-get -y update && sudo apt-get -y install git make kubectl google-cloud-sdk-gke-gcloud-auth-plugin jq gnuplot rsync wrk && \ curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash && \ export GO_VERSION=$(curl -sSL "https://golang.org/dl/?mode=json" | jq -r '.[0].version') && \ wget https://go.dev/dl/${GO_VERSION}.linux-amd64.tar.gz && \ diff --git a/tests/scripts/remote-scripts/run-nfr-tests.sh b/tests/scripts/remote-scripts/run-nfr-tests.sh new file mode 100644 index 000000000..10b4c1ea0 --- /dev/null +++ b/tests/scripts/remote-scripts/run-nfr-tests.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +source ~/vars.env + +echo "export PATH=$PATH:/usr/local/go/bin" >> $HOME/.profile && . $HOME/.profile + +if [ "$START_LONGEVITY" == "true" ]; then + GINKGO_LABEL="longevity-setup" +elif [ "$STOP_LONGEVITY" == "true" ]; then + GINKGO_LABEL="longevity-teardown" +fi + +cd nginx-gateway-fabric/tests && make .vm-nfr-test TAG=${TAG} PREFIX=${PREFIX} NGINX_PREFIX=${NGINX_PREFIX} NGINX_PLUS_PREFIX=${NGINX_PLUS_PREFIX} PLUS_ENABLED=${PLUS_ENABLED} GINKGO_LABEL=${GINKGO_LABEL} GINKGO_FLAGS=${GINKGO_FLAGS} PULL_POLICY=Always GW_SERVICE_TYPE=LoadBalancer GW_SVC_GKE_INTERNAL=true NGF_VERSION=${NGF_VERSION} + +if [ "$START_LONGEVITY" == "true" ]; then + suite/scripts/longevity-wrk.sh +fi diff --git a/tests/scripts/run-tests-gcp-vm.sh b/tests/scripts/run-tests-gcp-vm.sh index 2a407bfa1..4c9a8478e 100644 --- a/tests/scripts/run-tests-gcp-vm.sh +++ b/tests/scripts/run-tests-gcp-vm.sh @@ -4,8 +4,42 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) source scripts/vars.env +SCRIPT=run-tests.sh +if [ "${NFR}" = "true" ]; then + SCRIPT=run-nfr-tests.sh +fi + gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} ${SCRIPT_DIR}/vars.env username@${RESOURCE_NAME}:~ -gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} username@${RESOURCE_NAME} --command="bash -s" < ${SCRIPT_DIR}/remote-scripts/run-tests.sh +gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} username@${RESOURCE_NAME} \ + --command="export START_LONGEVITY=${START_LONGEVITY} &&\ + export STOP_LONGEVITY=${STOP_LONGEVITY} &&\ + bash -s" < ${SCRIPT_DIR}/remote-scripts/${SCRIPT} + +if [ "${NFR}" = "true" ]; then + gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} --recurse username@${RESOURCE_NAME}:~/nginx-gateway-fabric/tests/results . +fi + +## If tearing down the longevity test, we need to collect logs from gcloud and add to the results +if [ "${STOP_LONGEVITY}" = "true" ]; then + version=${NGF_VERSION} + if [ "$version" = "" ]; then + version=${TAG} + fi + + results="${SCRIPT_DIR}/../results/longevity/$version/$version.md" + printf "\n## Error Logs\n\n" >> $results + + ## ngf error logs + ngfErrText=$(gcloud logging read --project=${GKE_PROJECT} 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx-gateway AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("error")' --format "value(textPayload)") + ngfErrJSON=$(gcloud logging read --project=${GKE_PROJECT} 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx-gateway AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("error")' --format "value(jsonPayload)") + printf "### nginx-gateway\n$ngfErrText\n$ngfErrJSON\n\n" >> $results + + ## nginx error logs + ngxErr=$(gcloud logging read --project=${GKE_PROJECT} 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND severity=ERROR AND SEARCH("`[warn]`") OR SEARCH("`[error]`") OR SEARCH("`[emerg]`")' --format "value(textPayload)") + printf "### nginx\n$ngxErr\n\n" >> $results -gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} --recurse username@${RESOURCE_NAME}:~/nginx-gateway-fabric/tests/results . + ## nginx non-200 responses (also filter out 499 since wrk cancels connections) + ngxNon200=$(gcloud logging read --project=${GKE_PROJECT} 'resource.labels.cluster_name='"${RESOURCE_NAME}"' AND resource.type=k8s_container AND resource.labels.container_name=nginx AND labels."k8s-pod/app_kubernetes_io/instance"=ngf-longevity AND "GET" "HTTP/1.1" -"200" -"499" -"client prematurely closed connection"' --format "value(textPayload)") + printf "$ngxNon200\n\n" >> $results +fi diff --git a/tests/scripts/sync-files-to-vm.sh b/tests/scripts/sync-files-to-vm.sh new file mode 100755 index 000000000..c7862c205 --- /dev/null +++ b/tests/scripts/sync-files-to-vm.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +source scripts/vars.env + +NGF_DIR=$(dirname "$PWD") + +gcloud compute config-ssh --ssh-config-file ngf-gcp.ssh > /dev/null + +rsync -ave 'ssh -F ngf-gcp.ssh' ${NGF_DIR} username@${RESOURCE_NAME}.${GKE_CLUSTER_ZONE}.${GKE_PROJECT}:~ diff --git a/tests/suite/dataplane_perf_test.go b/tests/suite/dataplane_perf_test.go index 09f7a6748..9af85a1b2 100644 --- a/tests/suite/dataplane_perf_test.go +++ b/tests/suite/dataplane_perf_test.go @@ -17,7 +17,7 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) -var _ = Describe("Dataplane performance", Ordered, Label("performance"), func() { +var _ = Describe("Dataplane performance", Ordered, Label("nfr", "performance"), func() { files := []string{ "dp-perf/coffee.yaml", "dp-perf/gateway.yaml", diff --git a/tests/suite/longevity_test.go b/tests/suite/longevity_test.go new file mode 100644 index 000000000..0f1382620 --- /dev/null +++ b/tests/suite/longevity_test.go @@ -0,0 +1,97 @@ +package suite + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/nginxinc/nginx-gateway-fabric/tests/framework" +) + +// Longevity test is an NFR test, but does not include the "nfr" label. It needs to run on its own, +// outside of the scope of the other NFR tests. This is because it's a long-term test whose environment +// shouldn't be torn down. +var _ = Describe("Longevity", Label("longevity-setup", "longevity-teardown"), func() { + var ( + files = []string{ + "longevity/cafe.yaml", + "longevity/cafe-secret.yaml", + "longevity/gateway.yaml", + "longevity/cafe-routes.yaml", + "longevity/cronjob.yaml", + } + promFile = []string{ + "longevity/prom.yaml", + } + + ns = &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "longevity", + }, + } + + labelFilter = GinkgoLabelFilter() + ) + + BeforeEach(func() { + if !strings.Contains(labelFilter, "longevity") { + Skip("skipping longevity test unless 'longevity' label is explicitly defined when running") + } + }) + + It("sets up the longevity test", Label("longevity-setup"), func() { + if !strings.Contains(labelFilter, "longevity-setup") { + Skip("'longevity-setup' label not specified; skipping...") + } + + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(promFile, ngfNamespace)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + }) + + It("collects results", Label("longevity-teardown"), func() { + if !strings.Contains(labelFilter, "longevity-teardown") { + Skip("'longevity-teardown' label not specified; skipping...") + } + + resultsDir, err := framework.CreateResultsDir("longevity", version) + Expect(err).ToNot(HaveOccurred()) + + filename := filepath.Join(resultsDir, fmt.Sprintf("%s.md", version)) + resultsFile, err := framework.CreateResultsFile(filename) + Expect(err).ToNot(HaveOccurred()) + defer resultsFile.Close() + + Expect(framework.WriteSystemInfoToFile(resultsFile, clusterInfo, *plusEnabled)).To(Succeed()) + + // gather wrk output + homeDir, err := os.UserHomeDir() + Expect(err).ToNot(HaveOccurred()) + + Expect(framework.WriteContent(resultsFile, "\n## Traffic\n")) + Expect(writeTrafficResults(resultsFile, homeDir, "coffee.txt", "HTTP")).To(Succeed()) + Expect(writeTrafficResults(resultsFile, homeDir, "tea.txt", "HTTPS")).To(Succeed()) + + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + }) +}) + +func writeTrafficResults(resultsFile *os.File, homeDir, filename, testname string) error { + file := fmt.Sprintf("%s/%s", homeDir, filename) + content, err := os.ReadFile(file) + if err != nil { + return err + } + + formattedContent := fmt.Sprintf("%s:\n\n```text\n%s```\n", testname, string(content)) + return framework.WriteContent(resultsFile, formattedContent) +} diff --git a/tests/longevity/manifests/cafe-routes.yaml b/tests/suite/manifests/longevity/cafe-routes.yaml similarity index 100% rename from tests/longevity/manifests/cafe-routes.yaml rename to tests/suite/manifests/longevity/cafe-routes.yaml diff --git a/tests/longevity/manifests/cafe-secret.yaml b/tests/suite/manifests/longevity/cafe-secret.yaml similarity index 100% rename from tests/longevity/manifests/cafe-secret.yaml rename to tests/suite/manifests/longevity/cafe-secret.yaml diff --git a/tests/longevity/manifests/cafe.yaml b/tests/suite/manifests/longevity/cafe.yaml similarity index 100% rename from tests/longevity/manifests/cafe.yaml rename to tests/suite/manifests/longevity/cafe.yaml diff --git a/tests/longevity/manifests/cronjob.yaml b/tests/suite/manifests/longevity/cronjob.yaml similarity index 86% rename from tests/longevity/manifests/cronjob.yaml rename to tests/suite/manifests/longevity/cronjob.yaml index 234ff903d..1f7511cf3 100644 --- a/tests/longevity/manifests/cronjob.yaml +++ b/tests/suite/manifests/longevity/cronjob.yaml @@ -2,13 +2,11 @@ apiVersion: v1 kind: ServiceAccount metadata: name: rollout-mgr - namespace: default --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: rollout-mgr - namespace: default rules: - apiGroups: - "apps" @@ -21,7 +19,6 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: rollout-mgr - namespace: default roleRef: apiGroup: rbac.authorization.k8s.io kind: Role @@ -29,13 +26,11 @@ roleRef: subjects: - kind: ServiceAccount name: rollout-mgr - namespace: default --- apiVersion: batch/v1 kind: CronJob metadata: name: coffee-rollout-mgr - namespace: default spec: schedule: "* */6 * * *" # every minute every 6 hours jobTemplate: @@ -58,14 +53,13 @@ spec: -H "Authorization: Bearer $TOKEN" \ -H "Content-type: application/merge-patch+json" \ --data-raw "{\"spec\": {\"template\": {\"metadata\": {\"annotations\": {\"kubectl.kubernetes.io/restartedAt\": \"$RESTARTED_AT\"}}}}}" \ - "https://kubernetes/apis/apps/v1/namespaces/default/deployments/coffee?fieldManager=kubectl-rollout" 2>&1 + "https://kubernetes.default/apis/apps/v1/namespaces/default/deployments/coffee?fieldManager=kubectl-rollout" 2>&1 restartPolicy: OnFailure --- apiVersion: batch/v1 kind: CronJob metadata: name: tea-rollout-mgr - namespace: default spec: schedule: "* 3,9,15,21 * * *" # every minute every 6 hours, 3 hours apart from coffee jobTemplate: @@ -88,5 +82,5 @@ spec: -H "Authorization: Bearer $TOKEN" \ -H "Content-type: application/merge-patch+json" \ --data-raw "{\"spec\": {\"template\": {\"metadata\": {\"annotations\": {\"kubectl.kubernetes.io/restartedAt\": \"$RESTARTED_AT\"}}}}}" \ - "https://kubernetes/apis/apps/v1/namespaces/default/deployments/tea?fieldManager=kubectl-rollout" 2>&1 + "https://kubernetes.default/apis/apps/v1/namespaces/default/deployments/tea?fieldManager=kubectl-rollout" 2>&1 restartPolicy: OnFailure diff --git a/tests/longevity/manifests/gateway.yaml b/tests/suite/manifests/longevity/gateway.yaml similarity index 100% rename from tests/longevity/manifests/gateway.yaml rename to tests/suite/manifests/longevity/gateway.yaml diff --git a/tests/longevity/manifests/prom.yaml b/tests/suite/manifests/longevity/prom.yaml similarity index 79% rename from tests/longevity/manifests/prom.yaml rename to tests/suite/manifests/longevity/prom.yaml index e5d35fae7..24de26577 100644 --- a/tests/longevity/manifests/prom.yaml +++ b/tests/suite/manifests/longevity/prom.yaml @@ -6,7 +6,7 @@ metadata: spec: selector: matchLabels: - app.kubernetes.io/name: nginx-gateway + app.kubernetes.io/name: nginx-gateway-fabric endpoints: - port: metrics interval: 30s diff --git a/tests/suite/sample_test.go b/tests/suite/sample_test.go index 0e6ce59f1..3996c6764 100644 --- a/tests/suite/sample_test.go +++ b/tests/suite/sample_test.go @@ -14,7 +14,7 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/tests/framework" ) -var _ = Describe("Basic test example", func() { +var _ = Describe("Basic test example", Label("functional"), func() { files := []string{ "hello/hello.yaml", "hello/gateway.yaml", diff --git a/tests/suite/scripts/longevity-wrk.sh b/tests/suite/scripts/longevity-wrk.sh new file mode 100755 index 000000000..58312a1e1 --- /dev/null +++ b/tests/suite/scripts/longevity-wrk.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +SVC_IP=$(kubectl -n nginx-gateway get svc ngf-longevity-nginx-gateway-fabric -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + +echo "${SVC_IP} cafe.example.com" | sudo tee -a /etc/hosts + +nohup wrk -t2 -c100 -d96h http://cafe.example.com/coffee &> ~/coffee.txt & + +nohup wrk -t2 -c100 -d96h https://cafe.example.com/tea &> ~/tea.txt & diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 8d2af38b5..f758bd98b 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -68,6 +68,7 @@ var ( address string version string clusterInfo framework.ClusterInfo + skipNFRTests bool ) const ( @@ -76,9 +77,11 @@ const ( ) type setupConfig struct { + releaseName string chartPath string gwAPIVersion string deploy bool + nfr bool } func setup(cfg setupConfig, extraInstallArgs ...string) { @@ -110,12 +113,30 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { clusterInfo, err = resourceManager.GetClusterInfo() Expect(err).ToNot(HaveOccurred()) + if cfg.nfr && !clusterInfo.IsGKE { + skipNFRTests = true + Skip("NFR tests can only run in GKE") + } + + if cfg.nfr && *serviceType != "LoadBalancer" { + skipNFRTests = true + Skip("GW_SERVICE_TYPE must be 'LoadBalancer' for NFR tests") + } + + if *versionUnderTest != "" { + version = *versionUnderTest + } else if *imageTag != "" { + version = *imageTag + } else { + version = "edge" + } + if !cfg.deploy { return } installCfg := framework.InstallationConfig{ - ReleaseName: releaseName, + ReleaseName: cfg.releaseName, Namespace: ngfNamespace, ChartPath: cfg.chartPath, ServiceType: *serviceType, @@ -131,14 +152,6 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { installCfg.ImagePullPolicy = *imagePullPolicy } - if *versionUnderTest != "" { - version = *versionUnderTest - } else if *imageTag != "" { - version = *imageTag - } else { - version = "edge" - } - output, err := framework.InstallGatewayAPI(k8sClient, cfg.gwAPIVersion, *k8sVersion) Expect(err).ToNot(HaveOccurred(), string(output)) @@ -163,13 +176,13 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { Expect(err).ToNot(HaveOccurred()) } -func teardown() { +func teardown(relName string) { if portFwdPort != 0 { portForwardStopCh <- struct{}{} } cfg := framework.InstallationConfig{ - ReleaseName: releaseName, + ReleaseName: relName, Namespace: ngfNamespace, } @@ -204,21 +217,50 @@ var _ = BeforeSuite(func() { localChartPath = filepath.Join(basepath, "deploy/helm-chart") cfg := setupConfig{ + releaseName: releaseName, chartPath: localChartPath, gwAPIVersion: *gatewayAPIVersion, deploy: true, } - // If we are running the upgrade test only, then skip the initial deployment. - // The upgrade test will deploy its own version of NGF. - suiteConfig, _ := GinkgoConfiguration() - if suiteConfig.LabelFilter == "upgrade" { + labelFilter := GinkgoLabelFilter() + cfg.nfr = isNFR(labelFilter) + + // Skip deployment if: + // - running upgrade test (this test will deploy its own version) + // - running longevity teardown (deployment will already exist) + if strings.Contains(labelFilter, "upgrade") || strings.Contains(labelFilter, "longevity-teardown") { cfg.deploy = false } + // use a different release name for longevity to allow us to filter on a specific label when collecting + // logs from GKE + if strings.Contains(labelFilter, "longevity") { + cfg.releaseName = "ngf-longevity" + } + setup(cfg) }) var _ = AfterSuite(func() { - teardown() + if skipNFRTests { + Skip("") + } + + labelFilter := GinkgoLabelFilter() + if !strings.Contains(labelFilter, "longevity-setup") { + relName := releaseName + if strings.Contains(labelFilter, "longevity-teardown") { + relName = "ngf-longevity" + } + + teardown(relName) + } }) + +func isNFR(labelFilter string) bool { + return strings.Contains(labelFilter, "nfr") || + strings.Contains(labelFilter, "longevity") || + strings.Contains(labelFilter, "performance") || + strings.Contains(labelFilter, "upgrade") +} diff --git a/tests/suite/upgrade_test.go b/tests/suite/upgrade_test.go index 3fa71bcc6..0e5983401 100644 --- a/tests/suite/upgrade_test.go +++ b/tests/suite/upgrade_test.go @@ -26,7 +26,7 @@ import ( // This test installs the latest released version of NGF, then upgrades to the edge version (or dev version). // During the upgrade, traffic is continuously sent to ensure no downtime. // We also check that the leader election lease has been updated, and that Gateway updates are processed. -var _ = Describe("Upgrade testing", Label("upgrade"), func() { +var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { var ( files = []string{ "ngf-upgrade/cafe.yaml", @@ -44,23 +44,12 @@ var _ = Describe("Upgrade testing", Label("upgrade"), func() { valuesFile = "manifests/ngf-upgrade/values.yaml" resultsFile *os.File resultsDir string - skipped bool ) BeforeEach(func() { - if !clusterInfo.IsGKE { - skipped = true - Skip("Upgrade tests can only run in GKE") - } - - if *serviceType != "LoadBalancer" { - skipped = true - Skip("GW_SERVICE_TYPE must be 'LoadBalancer' for upgrade tests") - } - // this test is unique in that it needs to install the previous version of NGF, // so we need to uninstall the version installed at the suite level, then install the custom version - teardown() + teardown(releaseName) cfg := setupConfig{ chartPath: "oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric", @@ -84,10 +73,6 @@ var _ = Describe("Upgrade testing", Label("upgrade"), func() { }) AfterEach(func() { - if skipped { - Skip("") - } - Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) resultsFile.Close()