From ee4319c931f55611ed2aa290251f720908e8f11a Mon Sep 17 00:00:00 2001 From: Michael Pleshakov Date: Fri, 17 May 2024 16:24:07 -0400 Subject: [PATCH] Automate scale test (#1926) Problem: Non-functional scale test needs to be run manually. Solution: - Automate scale test. - Use in-cluster Prometheus to collect CPU, memory and NGF metrics. - Use Kubernetes API server to get NGF logs. For development and troubleshooting, it is possible to run scale test locally in Kind cluster. However, it is necessary to bring down the number of HTTPRoutes to 50 or less (roughly). Testing: - Ran this test locally with 64 listeners, 50 routes and 50 upstreams with NGINX OSS. - Ran this test locally with 64 listeners, 10 routes and 10 upstreams with NGINX Plus. - Ran this test on GKE with the default configuration with NGINX OSS. Out of scope: ensuring this test runs successfully via GitHub pipeline. Closes https://github.com/nginxinc/nginx-gateway-fabric/issues/1368 Largely based on work by Ciara in https://github.com/nginxinc/nginx-gateway-fabric/pull/1804 Co-authored-by: Ciara Stacke --- tests/Makefile | 2 +- .../generate_manifests.go | 183 ++-- tests/framework/portforward.go | 46 +- tests/framework/prometheus.go | 293 +++++++ tests/framework/resourcemanager.go | 133 ++- tests/framework/results.go | 63 +- tests/framework/timeout.go | 8 + .../results => results/scale}/1.0.0/1.0.0.md | 0 .../TestScale_HTTPRoutes/CPU-no-delay.png | Bin .../scale}/1.0.0/TestScale_HTTPRoutes/CPU.png | Bin .../TestScale_HTTPRoutes/Memory-no-delay.png | Bin .../1.0.0/TestScale_HTTPRoutes/Memory.png | Bin .../scale}/1.0.0/TestScale_HTTPRoutes/TTR.png | Bin .../TestScale_HTTPRoutes/results-no-delay.csv | 0 .../1.0.0/TestScale_HTTPRoutes/results.csv | 0 .../1.0.0/TestScale_HTTPSListeners/CPU.png | Bin .../1.0.0/TestScale_HTTPSListeners/Memory.png | Bin .../1.0.0/TestScale_HTTPSListeners/TTR.png | Bin .../TestScale_HTTPSListeners/results.csv | 0 .../scale}/1.0.0/TestScale_Listeners/CPU.png | Bin .../1.0.0/TestScale_Listeners/Memory.png | Bin .../scale}/1.0.0/TestScale_Listeners/TTR.png | Bin .../1.0.0/TestScale_Listeners/results.csv | 0 .../1.0.0/TestScale_UpstreamServers/CPU.png | Bin .../TestScale_UpstreamServers/Memory.png | Bin .../results => results/scale}/1.1.0/1.1.0.md | 0 .../TestScale_HTTPRoutes/CPU-no-delay.png | Bin .../scale}/1.1.0/TestScale_HTTPRoutes/CPU.png | Bin .../TestScale_HTTPRoutes/Memory-no-delay.png | Bin .../1.1.0/TestScale_HTTPRoutes/Memory.png | Bin .../scale}/1.1.0/TestScale_HTTPRoutes/TTR.png | Bin .../TestScale_HTTPRoutes/results-no-delay.csv | 0 .../1.1.0/TestScale_HTTPRoutes/results.csv | 0 .../1.1.0/TestScale_HTTPSListeners/CPU.png | Bin .../1.1.0/TestScale_HTTPSListeners/Memory.png | Bin .../1.1.0/TestScale_HTTPSListeners/TTR.png | Bin .../TestScale_HTTPSListeners/results.csv | 0 .../scale}/1.1.0/TestScale_Listeners/CPU.png | Bin .../1.1.0/TestScale_Listeners/Memory.png | Bin .../scale}/1.1.0/TestScale_Listeners/TTR.png | Bin .../1.1.0/TestScale_Listeners/results.csv | 0 .../1.1.0/TestScale_UpstreamServers/CPU.png | Bin .../TestScale_UpstreamServers/Memory.png | Bin .../results => results/scale}/1.2.0/1.2.0.md | 0 .../scale}/1.2.0/TestScale_HTTPRoutes/CPU.png | Bin .../1.2.0/TestScale_HTTPRoutes/Memory.png | Bin .../TestScale_HTTPRoutes/TTR-without-peak.png | Bin .../scale}/1.2.0/TestScale_HTTPRoutes/TTR.png | Bin .../1.2.0/TestScale_HTTPRoutes_Plus/CPU.png | Bin .../TestScale_HTTPRoutes_Plus/Memory.png | Bin .../1.2.0/TestScale_HTTPRoutes_Plus/TTR.png | Bin .../1.2.0/TestScale_HTTPSListeners/CPU.png | Bin .../1.2.0/TestScale_HTTPSListeners/Memory.png | Bin .../1.2.0/TestScale_HTTPSListeners/TTR.png | Bin .../TestScale_HTTPSListeners_Plus/CPU.png | Bin .../TestScale_HTTPSListeners_Plus/Memory.png | Bin .../TestScale_HTTPSListeners_Plus/TTR.png | Bin .../scale}/1.2.0/TestScale_Listeners/CPU.png | Bin .../1.2.0/TestScale_Listeners/Memory.png | Bin .../scale}/1.2.0/TestScale_Listeners/TTR.png | Bin .../1.2.0/TestScale_Listeners_Plus/CPU.png | Bin .../1.2.0/TestScale_Listeners_Plus/Memory.png | Bin .../1.2.0/TestScale_Listeners_Plus/TTR.png | Bin .../1.2.0/TestScale_UpstreamServers/CPU.png | Bin .../TestScale_UpstreamServers/Memory.png | Bin .../TestScale_UpstreamServers_Plus/CPU.png | Bin .../TestScale_UpstreamServers_Plus/Memory.png | Bin tests/scale/manifests/prom-clusterrole.yaml | 44 - tests/scale/scale.md | 527 ------------ tests/scale/scale_test.go | 259 ------ tests/scripts/cpu-plot.gp | 20 + tests/scripts/create-gke-cluster.sh | 14 +- tests/scripts/memory-plot.gp | 21 + tests/scripts/ttr-plot.gp | 18 + tests/scripts/vars.env-example | 2 + tests/suite/dataplane_perf_test.go | 4 +- tests/suite/graceful_recovery_test.go | 2 +- tests/suite/longevity_test.go | 2 +- .../manifests/scale/matches.yaml} | 0 .../manifests/scale/upstreams.yaml} | 0 tests/suite/sample_test.go | 2 +- tests/suite/scale_test.go | 797 ++++++++++++++++++ tests/suite/system_suite_test.go | 44 +- tests/suite/upgrade_test.go | 11 +- 84 files changed, 1533 insertions(+), 962 deletions(-) rename tests/{scale => framework}/generate_manifests.go (66%) create mode 100644 tests/framework/prometheus.go rename tests/{scale/results => results/scale}/1.0.0/1.0.0.md (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/CPU.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/Memory.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/TTR.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPRoutes/results.csv (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPSListeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPSListeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPSListeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_HTTPSListeners/results.csv (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_Listeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_Listeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_Listeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_Listeners/results.csv (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_UpstreamServers/CPU.png (100%) rename tests/{scale/results => results/scale}/1.0.0/TestScale_UpstreamServers/Memory.png (100%) rename tests/{scale/results => results/scale}/1.1.0/1.1.0.md (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/CPU.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/Memory.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/TTR.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPRoutes/results.csv (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPSListeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPSListeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPSListeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_HTTPSListeners/results.csv (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_Listeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_Listeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_Listeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_Listeners/results.csv (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_UpstreamServers/CPU.png (100%) rename tests/{scale/results => results/scale}/1.1.0/TestScale_UpstreamServers/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/1.2.0.md (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners_Plus/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners_Plus/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_Listeners_Plus/TTR.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_UpstreamServers/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_UpstreamServers/Memory.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_UpstreamServers_Plus/CPU.png (100%) rename tests/{scale/results => results/scale}/1.2.0/TestScale_UpstreamServers_Plus/Memory.png (100%) delete mode 100644 tests/scale/manifests/prom-clusterrole.yaml delete mode 100644 tests/scale/scale.md delete mode 100644 tests/scale/scale_test.go create mode 100644 tests/scripts/cpu-plot.gp create mode 100644 tests/scripts/memory-plot.gp create mode 100644 tests/scripts/ttr-plot.gp rename tests/{scale/manifests/scale-matches.yaml => suite/manifests/scale/matches.yaml} (100%) rename tests/{scale/manifests/scale-upstreams.yaml => suite/manifests/scale/upstreams.yaml} (100%) create mode 100644 tests/suite/scale_test.go diff --git a/tests/Makefile b/tests/Makefile index 2eb9203426..947fe6812e 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -107,7 +107,7 @@ test: ## Runs the functional tests on your default k8s cluster --label-filter "functional" $(GINKGO_FLAGS) ./suite -- \ --gateway-api-version=$(GW_API_VERSION) --gateway-api-prev-version=$(GW_API_PREV_VERSION) \ --image-tag=$(TAG) --version-under-test=$(NGF_VERSION) \ - --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) \ + --plus-enabled=$(PLUS_ENABLED) --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) --nginx-plus-image-repo=$(NGINX_PLUS_PREFIX) \ --pull-policy=$(PULL_POLICY) --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) \ --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) diff --git a/tests/scale/generate_manifests.go b/tests/framework/generate_manifests.go similarity index 66% rename from tests/scale/generate_manifests.go rename to tests/framework/generate_manifests.go index 88f752141d..46c7b92b32 100644 --- a/tests/scale/generate_manifests.go +++ b/tests/framework/generate_manifests.go @@ -1,17 +1,18 @@ -//go:build scale -// +build scale - -package scale +package framework import ( "bytes" + "errors" "fmt" - "os" - "path/filepath" + "io" "text/template" + + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/util/yaml" + "sigs.k8s.io/controller-runtime/pkg/client" ) -var gwTmplTxt = `apiVersion: gateway.networking.k8s.io/v1 +const gwTmplTxt = `apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: name: gateway @@ -33,7 +34,7 @@ spec: {{- end -}} {{- end -}}` -var hrTmplTxt = `apiVersion: gateway.networking.k8s.io/v1 +const hrTmplTxt = `apiVersion: gateway.networking.k8s.io/v1 kind: HTTPRoute metadata: name: {{ .Name }} @@ -53,7 +54,7 @@ spec: port: 80` // nolint:all -var secretTmplTxt = `apiVersion: v1 +const secretTmplTxt = `apiVersion: v1 kind: Secret metadata: name: {{ . }} @@ -63,8 +64,7 @@ data: tls.key: LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2UUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktjd2dnU2pBZ0VBQW9JQkFRQzZtTnJSdUZ2WXZoSE4KbXI3c1FvNUtKSUVDN3N6TFVrNExFeklSNS9yMEVaUjQ2RnRTaGJQd0ZuaXAwMFBxekhpVkhKYy92TjdkQTVLeApQS1VmdFJuQ1J6YldVaTZBZzJpRU93bXF6WUhGbVNpZkFlVjk0RlAxOGtSbjl1ckV3OEpiRXJIUncrVW51L25tCmFMRHF1eGpFTVBweGhuRklCSnYwK1R3djNEVGx6TjNwUlV6dnpidGZvZCtEVTZBSmR6N3Rid1dTNmR6MHc1Z2kKbW9RelZnbFpnVDBJek9FZkV3NVpWMnRMZllHZWRlRVJ1VjhtR041c09va3R2aGxsMU1udHRaMkZNVHgySmVjUQo3K0xBRm9YVnBTS2NjbUFVZ1JBM0xOOHdVZXBVTHZZdFhiUm1QTFc4SjFINmhFeHJHTHBiTERZNmpzbGxBNlZpCk0xMjVjU0hsQWdNQkFBRUNnZ0VBQnpaRE50bmVTdWxGdk9HZlFYaHRFWGFKdWZoSzJBenRVVVpEcUNlRUxvekQKWlV6dHdxbkNRNlJLczUyandWNTN4cU9kUU94bTNMbjNvSHdNa2NZcEliWW82MjJ2dUczYnkwaVEzaFlsVHVMVgpqQmZCcS9UUXFlL2NMdngvSkczQWhFNmJxdFRjZFlXeGFmTmY2eUtpR1dzZk11WVVXTWs4MGVJVUxuRmZaZ1pOCklYNTlSOHlqdE9CVm9Sa3hjYTVoMW1ZTDFsSlJNM3ZqVHNHTHFybmpOTjNBdWZ3ZGRpK1VDbGZVL2l0K1EvZkUKV216aFFoTlRpNVFkRWJLVStOTnYvNnYvb2JvandNb25HVVBCdEFTUE05cmxFemIralQ1WHdWQjgvLzRGY3VoSwoyVzNpcjhtNHVlQ1JHSVlrbGxlLzhuQmZ0eVhiVkNocVRyZFBlaGlPM1FLQmdRRGlrR3JTOTc3cjg3Y1JPOCtQClpoeXltNXo4NVIzTHVVbFNTazJiOTI1QlhvakpZL2RRZDVTdFVsSWE4OUZKZnNWc1JRcEhHaTFCYzBMaTY1YjIKazR0cE5xcVFoUmZ1UVh0UG9GYXRuQzlPRnJVTXJXbDVJN0ZFejZnNkNQMVBXMEg5d2hPemFKZUdpZVpNYjlYTQoybDdSSFZOcC9jTDlYbmhNMnN0Q1lua2Iwd0tCZ1FEUzF4K0crakEyUVNtRVFWNXA1RnRONGcyamsyZEFjMEhNClRIQ2tTazFDRjhkR0Z2UWtsWm5ZbUt0dXFYeXNtekJGcnZKdmt2eUhqbUNYYTducXlpajBEdDZtODViN3BGcVAKQWxtajdtbXI3Z1pUeG1ZMXBhRWFLMXY4SDNINGtRNVl3MWdrTWRybVJHcVAvaTBGaDVpaGtSZS9DOUtGTFVkSQpDcnJjTzhkUVp3S0JnSHA1MzRXVWNCMVZibzFlYStIMUxXWlFRUmxsTWlwRFM2TzBqeWZWSmtFb1BZSEJESnp2ClIrdzZLREJ4eFoyWmJsZ05LblV0YlhHSVFZd3lGelhNcFB5SGxNVHpiZkJhYmJLcDFyR2JVT2RCMXpXM09PRkgKcmppb21TUm1YNmxhaDk0SjRHU0lFZ0drNGw1SHhxZ3JGRDZ2UDd4NGRjUktJWFpLZ0w2dVJSSUpBb0dCQU1CVApaL2p5WStRNTBLdEtEZHUrYU9ORW4zaGxUN3hrNXRKN3NBek5rbWdGMU10RXlQUk9Xd1pQVGFJbWpRbk9qbHdpCldCZ2JGcXg0M2ZlQ1Z4ZXJ6V3ZEM0txaWJVbWpCTkNMTGtYeGh3ZEVteFQwVit2NzZGYzgwaTNNYVdSNnZZR08KditwVVovL0F6UXdJcWZ6dlVmV2ZxdStrMHlhVXhQOGNlcFBIRyt0bEFvR0FmQUtVVWhqeFU0Ym5vVzVwVUhKegpwWWZXZXZ5TW54NWZyT2VsSmRmNzlvNGMvMHhVSjh1eFBFWDFkRmNrZW96dHNpaVFTNkN6MENRY09XVWxtSkRwCnVrdERvVzM3VmNSQU1BVjY3NlgxQVZlM0UwNm5aL2g2Tkd4Z28rT042Q3pwL0lkMkJPUm9IMFAxa2RjY1NLT3kKMUtFZlNnb1B0c1N1eEpBZXdUZmxDMXc9Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K ` -var appTmplTxt = `apiVersion: v1 -apiVersion: apps/v1 +const appTmplTxt = `apiVersion: apps/v1 kind: Deployment metadata: name: {{ . }} @@ -105,25 +105,55 @@ var ( appTmpl = template.Must(template.New("app").Parse(appTmplTxt)) ) -type Listener struct { +type listener struct { Name string HostnamePrefix string SecretName string } -type Route struct { +type route struct { Name string ListenerName string HostnamePrefix string BackendName string } -func getPrereqDirName(manifestDir string) string { - return filepath.Join(manifestDir, "prereqs") +// ScaleObjects contains objects for scale testing. +type ScaleObjects struct { + // BaseObjects contains objects that are common to all scale iterations. + BaseObjects []client.Object + // ScaleIterationGroups contains objects for each scale iteration. + ScaleIterationGroups [][]client.Object } -func generateScaleListenerManifests(numListeners int, manifestDir string, tls bool) error { - listeners := make([]Listener, 0) +func decodeObjects(reader io.Reader) ([]client.Object, error) { + var objects []client.Object + + decoder := yaml.NewYAMLOrJSONDecoder(reader, 4096) + for { + obj := unstructured.Unstructured{} + if err := decoder.Decode(&obj); err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, fmt.Errorf("error decoding resource: %w", err) + } + + if len(obj.Object) == 0 { + continue + } + + objects = append(objects, &obj) + } + + return objects, nil +} + +// GenerateScaleListenerObjects generates objects for a given number of listeners for the scale test. +func GenerateScaleListenerObjects(numListeners int, tls bool) (ScaleObjects, error) { + var result ScaleObjects + + listeners := make([]listener, 0) backends := make([]string, 0) secrets := make([]string, 0) @@ -138,13 +168,13 @@ func generateScaleListenerManifests(numListeners int, manifestDir string, tls bo secrets = append(secrets, secretName) } - listeners = append(listeners, Listener{ + listeners = append(listeners, listener{ Name: listenerName, HostnamePrefix: hostnamePrefix, SecretName: secretName, }) - route := Route{ + r := route{ Name: fmt.Sprintf("route-%d", i), ListenerName: listenerName, HostnamePrefix: hostnamePrefix, @@ -153,44 +183,57 @@ func generateScaleListenerManifests(numListeners int, manifestDir string, tls bo backends = append(backends, backendName) - if err := generateManifests(manifestDir, i, listeners, []Route{route}); err != nil { - return err + objects, err := generateManifests(listeners, []route{r}) + if err != nil { + return ScaleObjects{}, err } + + result.ScaleIterationGroups = append(result.ScaleIterationGroups, objects) } - if err := generateSecrets(getPrereqDirName(manifestDir), secrets); err != nil { - return err + secretObjects, err := generateSecrets(secrets) + if err != nil { + return ScaleObjects{}, err } - return generateBackendAppManifests(getPrereqDirName(manifestDir), backends) -} + result.BaseObjects = append(result.BaseObjects, secretObjects...) -func generateSecrets(secretsDir string, secrets []string) error { - err := os.Mkdir(secretsDir, 0o750) - if err != nil && !os.IsExist(err) { - return err + backendObjects, err := generateBackendAppObjects(backends) + if err != nil { + return ScaleObjects{}, err } + result.BaseObjects = append(result.BaseObjects, backendObjects...) + + return result, nil +} + +func generateSecrets(secrets []string) ([]client.Object, error) { + objects := make([]client.Object, 0, len(secrets)) + for _, secret := range secrets { var buf bytes.Buffer - if err = secretTmpl.Execute(&buf, secret); err != nil { - return err + if err := secretTmpl.Execute(&buf, secret); err != nil { + return nil, err } - path := filepath.Join(secretsDir, fmt.Sprintf("%s.yaml", secret)) - - fmt.Println("Writing", path) - if err := os.WriteFile(path, buf.Bytes(), 0o600); err != nil { - return err + objs, err := decodeObjects(&buf) + if err != nil { + return nil, err } + + objects = append(objects, objs...) } - return nil + return objects, nil } -func generateScaleHTTPRouteManifests(numRoutes int, manifestDir string) error { - l := Listener{ +// GenerateScaleHTTPRouteObjects generates objects for a given number of routes for the scale test. +func GenerateScaleHTTPRouteObjects(numRoutes int) (ScaleObjects, error) { + var result ScaleObjects + + l := listener{ Name: "listener", HostnamePrefix: "*", } @@ -198,35 +241,43 @@ func generateScaleHTTPRouteManifests(numRoutes int, manifestDir string) error { backendName := "backend" for i := 0; i < numRoutes; i++ { - - route := Route{ + r := route{ Name: fmt.Sprintf("route-%d", i), HostnamePrefix: fmt.Sprintf("%d", i), ListenerName: "listener", BackendName: backendName, } - var listeners []Listener + var listeners []listener if i == 0 { // only generate a Gateway on the first iteration - listeners = []Listener{l} + listeners = []listener{l} } - if err := generateManifests(manifestDir, i, listeners, []Route{route}); err != nil { - return err + objects, err := generateManifests(listeners, []route{r}) + if err != nil { + return ScaleObjects{}, err } + result.ScaleIterationGroups = append(result.ScaleIterationGroups, objects) + } + + backendObjects, err := generateBackendAppObjects([]string{backendName}) + if err != nil { + return ScaleObjects{}, err } - return generateBackendAppManifests(getPrereqDirName(manifestDir), []string{backendName}) + result.BaseObjects = backendObjects + + return result, nil } -func generateManifests(outDir string, version int, listeners []Listener, routes []Route) error { +func generateManifests(listeners []listener, routes []route) ([]client.Object, error) { var buf bytes.Buffer if len(listeners) > 0 { if err := gwTmpl.Execute(&buf, listeners); err != nil { - return err + return nil, err } } @@ -236,42 +287,30 @@ func generateManifests(outDir string, version int, listeners []Listener, routes } if err := hrTmpl.Execute(&buf, r); err != nil { - return err + return nil, err } } - err := os.Mkdir(outDir, 0o750) - if err != nil && !os.IsExist(err) { - return err - } - - filename := fmt.Sprintf("manifest-%d.yaml", version) - path := filepath.Join(outDir, filename) - - fmt.Println("Writing", path) - return os.WriteFile(path, buf.Bytes(), 0o600) + return decodeObjects(&buf) } -func generateBackendAppManifests(outDir string, backends []string) error { - err := os.Mkdir(outDir, 0o750) - if err != nil && !os.IsExist(err) { - return err - } +func generateBackendAppObjects(backends []string) ([]client.Object, error) { + objects := make([]client.Object, 0, 2*len(backends)) for _, backend := range backends { var buf bytes.Buffer - if err = appTmpl.Execute(&buf, backend); err != nil { - return err + if err := appTmpl.Execute(&buf, backend); err != nil { + return nil, err } - path := filepath.Join(outDir, fmt.Sprintf("%s.yaml", backend)) - - fmt.Println("Writing", path) - if err := os.WriteFile(path, buf.Bytes(), 0o600); err != nil { - return err + objs, err := decodeObjects(&buf) + if err != nil { + return nil, err } + + objects = append(objects, objs...) } - return nil + return objects, nil } diff --git a/tests/framework/portforward.go b/tests/framework/portforward.go index 1efc16be3c..e8234fea7c 100644 --- a/tests/framework/portforward.go +++ b/tests/framework/portforward.go @@ -6,22 +6,25 @@ import ( "net/http" "net/url" "path" + "time" + + "log/slog" "k8s.io/client-go/rest" "k8s.io/client-go/tools/portforward" "k8s.io/client-go/transport/spdy" ) -// PortForward starts a port-forward to the specified Pod and returns the local port being forwarded. -func PortForward(config *rest.Config, namespace, podName string, stopCh chan struct{}) (int, error) { +// PortForward starts a port-forward to the specified Pod. +func PortForward(config *rest.Config, namespace, podName string, ports []string, stopCh <-chan struct{}) error { roundTripper, upgrader, err := spdy.RoundTripperFor(config) if err != nil { - return 0, fmt.Errorf("error creating roundtripper: %w", err) + return fmt.Errorf("error creating roundtripper: %w", err) } serverURL, err := url.Parse(config.Host) if err != nil { - return 0, fmt.Errorf("error parsing rest config host: %w", err) + return fmt.Errorf("error parsing rest config host: %w", err) } serverURL.Path = path.Join( @@ -33,25 +36,34 @@ func PortForward(config *rest.Config, namespace, podName string, stopCh chan str dialer := spdy.NewDialer(upgrader, &http.Client{Transport: roundTripper}, http.MethodPost, serverURL) - readyCh := make(chan struct{}, 1) out, errOut := new(bytes.Buffer), new(bytes.Buffer) - forwarder, err := portforward.New(dialer, []string{":80"}, stopCh, readyCh, out, errOut) - if err != nil { - return 0, fmt.Errorf("error creating port forwarder: %w", err) + forward := func() error { + readyCh := make(chan struct{}, 1) + + forwarder, err := portforward.New(dialer, ports, stopCh, readyCh, out, errOut) + if err != nil { + return fmt.Errorf("error creating port forwarder: %w", err) + } + + return forwarder.ForwardPorts() } go func() { - if err := forwarder.ForwardPorts(); err != nil { - panic(err) + for { + if err := forward(); err != nil { + slog.Error("error forwarding ports", "error", err) + slog.Info("retrying port forward in 100ms...") + } + + select { + case <-stopCh: + return + case <-time.After(100 * time.Millisecond): + // retrying + } } }() - <-readyCh - ports, err := forwarder.GetPorts() - if err != nil { - return 0, fmt.Errorf("error getting ports being forwarded: %w", err) - } - - return int(ports[0].Local), nil + return nil } diff --git a/tests/framework/prometheus.go b/tests/framework/prometheus.go new file mode 100644 index 0000000000..3afe01521f --- /dev/null +++ b/tests/framework/prometheus.go @@ -0,0 +1,293 @@ +package framework + +import ( + "context" + "encoding/csv" + "errors" + "fmt" + "log/slog" + "os" + "os/exec" + "time" + + "github.com/prometheus/client_golang/api" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + prometheusNamespace = "prom" + prometheusReleaseName = "prom" +) + +var defaultPrometheusQueryTimeout = 2 * time.Second + +// PrometheusConfig is the configuration for installing Prometheus +type PrometheusConfig struct { + // ScrapeInterval is the interval at which Prometheus scrapes metrics. + ScrapeInterval time.Duration + // QueryTimeout is the timeout for Prometheus queries. + // Default is 2s. + QueryTimeout time.Duration +} + +// InstallPrometheus installs Prometheus in the cluster. +// It waits for Prometheus pods to be ready before returning. +func InstallPrometheus( + rm ResourceManager, + cfg PrometheusConfig, +) (PrometheusInstance, error) { + output, err := exec.Command( + "helm", + "repo", + "add", + "prometheus-community", + "https://prometheus-community.github.io/helm-charts", + ).CombinedOutput() + if err != nil { + return PrometheusInstance{}, fmt.Errorf("failed to add Prometheus helm repo: %w; output: %s", err, string(output)) + } + + output, err = exec.Command( + "helm", + "repo", + "update", + ).CombinedOutput() + if err != nil { + return PrometheusInstance{}, fmt.Errorf("failed to update helm repos: %w; output: %s", err, string(output)) + } + + scrapeInterval := fmt.Sprintf("%ds", int(cfg.ScrapeInterval.Seconds())) + + output, err = exec.Command( + "helm", + "install", + prometheusReleaseName, + "prometheus-community/prometheus", + "--create-namespace", + "--namespace", prometheusNamespace, + "--set", fmt.Sprintf("server.global.scrape_interval=%s", scrapeInterval), + "--wait", + ).CombinedOutput() + if err != nil { + return PrometheusInstance{}, fmt.Errorf("failed to install Prometheus: %w; output: %s", err, string(output)) + } + + pods, err := rm.GetPods(prometheusNamespace, client.MatchingLabels{ + "app.kubernetes.io/name": "prometheus", + }) + if err != nil { + return PrometheusInstance{}, fmt.Errorf("failed to get Prometheus pods: %w", err) + } + + if len(pods) != 1 { + return PrometheusInstance{}, fmt.Errorf("expected one Prometheus pod, found %d", len(pods)) + } + + pod := pods[0] + + if pod.Status.PodIP == "" { + return PrometheusInstance{}, errors.New("Prometheus pod has no IP") + } + + var queryTimeout time.Duration + if cfg.QueryTimeout == 0 { + queryTimeout = defaultPrometheusQueryTimeout + } else { + queryTimeout = cfg.QueryTimeout + } + + return PrometheusInstance{ + podIP: pod.Status.PodIP, + podName: pod.Name, + podNamespace: pod.Namespace, + queryTimeout: queryTimeout, + }, nil +} + +// UninstallPrometheus uninstalls Prometheus from the cluster. +func UninstallPrometheus(rm ResourceManager) error { + output, err := exec.Command( + "helm", + "uninstall", + prometheusReleaseName, + "-n", prometheusNamespace, + ).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to uninstall Prometheus: %w; output: %s", err, string(output)) + } + + if err := rm.DeleteNamespace(prometheusNamespace); err != nil { + return fmt.Errorf("failed to delete Prometheus namespace: %w", err) + } + + return nil +} + +const ( + // PrometheusPortForwardPort is the local port that will forward to the Prometheus API. + PrometheusPortForwardPort = 9090 + prometheusAPIPort = 9090 +) + +// PrometheusInstance represents a Prometheus instance in the cluster. +type PrometheusInstance struct { + podIP string + podName string + podNamespace string + portForward bool + queryTimeout time.Duration + + apiClient v1.API +} + +// PortForward starts port forwarding to the Prometheus instance. +func (ins *PrometheusInstance) PortForward(config *rest.Config, stopCh <-chan struct{}) error { + if ins.portForward { + panic("port forwarding already started") + } + + ins.portForward = true + + ports := []string{fmt.Sprintf("%d:%d", PrometheusPortForwardPort, prometheusAPIPort)} + return PortForward(config, ins.podNamespace, ins.podName, ports, stopCh) +} + +func (ins *PrometheusInstance) getAPIClient() (v1.API, error) { + var endpoint string + if ins.portForward { + endpoint = fmt.Sprintf("http://localhost:%d", PrometheusPortForwardPort) + } else { + // on GKE, test runner VM can access the pod directly + endpoint = fmt.Sprintf("http://%s:%d", ins.podIP, prometheusAPIPort) + } + + cfg := api.Config{ + Address: fmt.Sprintf("%s", endpoint), + } + + c, err := api.NewClient(cfg) + if err != nil { + return nil, err + } + + return v1.NewAPI(c), nil +} + +func (ins *PrometheusInstance) ensureAPIClient() error { + if ins.apiClient == nil { + api, err := ins.getAPIClient() + if err != nil { + return fmt.Errorf("failed to get Prometheus API client: %w", err) + } + ins.apiClient = api + } + + return nil +} + +// Query sends a query to Prometheus. +func (ins *PrometheusInstance) Query(query string) (model.Value, error) { + ctx, cancel := context.WithTimeout(context.Background(), ins.queryTimeout) + defer cancel() + + return ins.QueryWithCtx(ctx, query) +} + +// QueryWithCtx sends a query to Prometheus with the specified context. +func (ins *PrometheusInstance) QueryWithCtx(ctx context.Context, query string) (model.Value, error) { + if err := ins.ensureAPIClient(); err != nil { + return nil, err + } + + result, warnings, err := ins.apiClient.Query(ctx, query, time.Time{}) + if err != nil { + return nil, fmt.Errorf("failed to query Prometheus: %w", err) + } + + if len(warnings) > 0 { + slog.Info( + "Prometheus query returned warnings", + "query", query, + "warnings", warnings, + ) + } + + return result, nil +} + +// QueryRange sends a range query to Prometheus. +func (ins *PrometheusInstance) QueryRange(query string, promRange v1.Range) (model.Value, error) { + ctx, cancel := context.WithTimeout(context.Background(), ins.queryTimeout) + defer cancel() + + return ins.QueryRangeWithCtx(ctx, query, promRange) +} + +// QueryRangeWithCtx sends a range query to Prometheus with the specified context. +func (ins *PrometheusInstance) QueryRangeWithCtx(ctx context.Context, query string, promRange v1.Range) (model.Value, error) { + if err := ins.ensureAPIClient(); err != nil { + return nil, err + } + + result, warnings, err := ins.apiClient.QueryRange(ctx, query, promRange) + if err != nil { + return nil, fmt.Errorf("failed to query Prometheus: %w", err) + } + + if len(warnings) > 0 { + slog.Info( + "Prometheus range query returned warnings", + "query", query, + "range", promRange, + "warnings", warnings, + ) + } + + return result, nil +} + +// GetFirstValueOfPrometheusVector returns the first value of a Prometheus vector. +func GetFirstValueOfPrometheusVector(val model.Value) (float64, error) { + res, ok := val.(model.Vector) + if !ok { + return 0, fmt.Errorf("expected a vector, got %T", val) + } + + if len(res) == 0 { + return 0, errors.New("empty vector") + } + + return float64(res[0].Value), nil +} + +// WritePrometheusMatrixToCSVFile writes a Prometheus matrix to a CSV file. +func WritePrometheusMatrixToCSVFile(fileName string, value model.Value) error { + file, err := os.Create(fileName) + if err != nil { + return err + } + defer file.Close() + + csvWriter := csv.NewWriter(file) + + matrix, ok := value.(model.Matrix) + if !ok { + return fmt.Errorf("expected a matrix, got %T", value) + } + + for _, sample := range matrix { + for _, pair := range sample.Values { + record := []string{fmt.Sprint(pair.Timestamp.Unix()), pair.Value.String()} + if err := csvWriter.Write(record); err != nil { + return err + } + } + } + + csvWriter.Flush() + + return nil +} diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index fdc996c5d6..3cc73d08a8 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -27,9 +27,12 @@ import ( "fmt" "io" "net/http" + "reflect" "strings" "time" + "k8s.io/client-go/util/retry" + apps "k8s.io/api/apps/v1" core "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -71,7 +74,17 @@ func (rm *ResourceManager) Apply(resources []client.Object) error { defer cancel() for _, resource := range resources { - if err := rm.K8sClient.Get(ctx, client.ObjectKeyFromObject(resource), resource); err != nil { + var obj client.Object + + unstructuredObj, ok := resource.(*unstructured.Unstructured) + if ok { + obj = unstructuredObj.DeepCopy() + } else { + t := reflect.TypeOf(resource).Elem() + obj = reflect.New(t).Interface().(client.Object) + } + + if err := rm.K8sClient.Get(ctx, client.ObjectKeyFromObject(resource), obj); err != nil { if !apierrors.IsNotFound(err) { return fmt.Errorf("error getting resource: %w", err) } @@ -83,7 +96,16 @@ func (rm *ResourceManager) Apply(resources []client.Object) error { continue } - if err := rm.K8sClient.Update(ctx, resource); err != nil { + // Some tests modify resources that are also modified by NGF (to update their status), so conflicts are possible + // For example, a Gateway resource. + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + if err := rm.K8sClient.Get(ctx, client.ObjectKeyFromObject(resource), obj); err != nil { + return err + } + resource.SetResourceVersion(obj.GetResourceVersion()) + return rm.K8sClient.Update(ctx, resource) + }) + if err != nil { return fmt.Errorf("error updating resource: %w", err) } } @@ -112,8 +134,16 @@ func (rm *ResourceManager) ApplyFromFiles(files []string, namespace string) erro return nil } - obj.SetResourceVersion(fetchedObj.GetResourceVersion()) - if err := rm.K8sClient.Update(ctx, &obj); err != nil { + // Some tests modify resources that are also modified by NGF (to update their status), so conflicts are possible + // For example, a Gateway resource. + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + if err := rm.K8sClient.Get(ctx, nsName, fetchedObj); err != nil { + return err + } + obj.SetResourceVersion(fetchedObj.GetResourceVersion()) + return rm.K8sClient.Update(ctx, &obj) + }) + if err != nil { return fmt.Errorf("error updating resource: %w", err) } @@ -137,7 +167,39 @@ func (rm *ResourceManager) Delete(resources []client.Object, opts ...client.Dele return nil } -// DeleteFromFile deletes Kubernetes resources defined within the provided YAML files. +func (rm *ResourceManager) DeleteNamespace(name string) error { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.DeleteNamespaceTimeout) + defer cancel() + + ns := &core.Namespace{} + if err := rm.K8sClient.Get(ctx, types.NamespacedName{Name: name}, ns); err != nil { + if apierrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("error getting namespace: %w", err) + } + + if err := rm.K8sClient.Delete(ctx, ns); err != nil { + return fmt.Errorf("error deleting namespace: %w", err) + } + + // Because the namespace deletion is asynchronous, we need to wait for the namespace to be deleted. + return wait.PollUntilContextCancel( + ctx, + 500*time.Millisecond, + true, /* poll immediately */ + func(ctx context.Context) (bool, error) { + if err := rm.K8sClient.Get(ctx, types.NamespacedName{Name: name}, ns); err != nil { + if apierrors.IsNotFound(err) { + return true, nil + } + return false, fmt.Errorf("error getting namespace: %w", err) + } + return false, nil + }) +} + +// DeleteFromFiles deletes Kubernetes resources defined within the provided YAML files. func (rm *ResourceManager) DeleteFromFiles(files []string, namespace string) error { handlerFunc := func(obj unstructured.Unstructured) error { obj.SetNamespace(namespace) @@ -241,7 +303,13 @@ func (rm *ResourceManager) WaitForAppsToBeReady(namespace string) error { ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.CreateTimeout) defer cancel() - if err := rm.waitForPodsToBeReady(ctx, namespace); err != nil { + return rm.WaitForAppsToBeReadyWithCtx(ctx, namespace) +} + +// WaitForAppsToBeReadyWithCtx waits for all apps in the specified namespace to be ready or +// until the provided context is cancelled. +func (rm *ResourceManager) WaitForAppsToBeReadyWithCtx(ctx context.Context, namespace string) error { + if err := rm.WaitForPodsToBeReady(ctx, namespace); err != nil { return err } @@ -252,7 +320,9 @@ func (rm *ResourceManager) WaitForAppsToBeReady(namespace string) error { return rm.waitForGatewaysToBeReady(ctx, namespace) } -func (rm *ResourceManager) waitForPodsToBeReady(ctx context.Context, namespace string) error { +// WaitForPodsToBeReady waits for all Pods in the specified namespace to be ready or +// until the provided context is cancelled. +func (rm *ResourceManager) WaitForPodsToBeReady(ctx context.Context, namespace string) error { return wait.PollUntilContextCancel( ctx, 500*time.Millisecond, @@ -447,6 +517,37 @@ func (rm *ResourceManager) GetPodNames(namespace string, labels client.MatchingL return names, nil } +// GetPods returns all Pods in the specified namespace that match the given labels. +func (rm *ResourceManager) GetPods(namespace string, labels client.MatchingLabels) ([]core.Pod, error) { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) + defer cancel() + + var podList core.PodList + if err := rm.K8sClient.List( + ctx, + &podList, + client.InNamespace(namespace), + labels, + ); err != nil { + return nil, fmt.Errorf("error getting list of Pods: %w", err) + } + + return podList.Items, nil +} + +// GetPod returns the Pod in the specified namespace with the given name. +func (rm *ResourceManager) GetPod(namespace, name string) (*core.Pod, error) { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) + defer cancel() + + var pod core.Pod + if err := rm.K8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &pod); err != nil { + return nil, fmt.Errorf("error getting Pod: %w", err) + } + + return &pod, nil +} + // GetPodLogs returns the logs from the specified Pod func (rm *ResourceManager) GetPodLogs(namespace, name string, opts *core.PodLogOptions) (string, error) { ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.GetTimeout) @@ -494,6 +595,24 @@ func (rm *ResourceManager) GetNGFDeployment(namespace, releaseName string) (*app return &deployment, nil } +// ScaleDeployment scales the Deployment to the specified number of replicas. +func (rm *ResourceManager) ScaleDeployment(namespace, name string, replicas int32) error { + ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.UpdateTimeout) + defer cancel() + + var deployment apps.Deployment + if err := rm.K8sClient.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, &deployment); err != nil { + return fmt.Errorf("error getting Deployment: %w", err) + } + + deployment.Spec.Replicas = &replicas + if err := rm.K8sClient.Update(ctx, &deployment); err != nil { + return fmt.Errorf("error updating Deployment: %w", err) + } + + return nil +} + // GetReadyNGFPodNames returns the name(s) of the NGF Pod(s). func GetReadyNGFPodNames( k8sClient client.Client, diff --git a/tests/framework/results.go b/tests/framework/results.go index 7d5b8ad2ee..87d363b080 100644 --- a/tests/framework/results.go +++ b/tests/framework/results.go @@ -1,6 +1,7 @@ package framework import ( + "encoding/csv" "fmt" "io" "os" @@ -64,24 +65,48 @@ func WriteSystemInfoToFile(file *os.File, ci ClusterInfo, plus bool) error { return nil } -// GeneratePNG generates a PNG using gnuplot. -func GeneratePNG(resultsDir, inputFilename, outputFilename string) ([]byte, error) { +func generatePNG(resultsDir, inputFilename, outputFilename, configFilename string) error { pwd, err := os.Getwd() if err != nil { - return nil, err + return err } - gnuplotCfg := filepath.Join(filepath.Dir(pwd), "scripts", "requests-plot.gp") + gnuplotCfg := filepath.Join(filepath.Dir(pwd), "scripts", configFilename) files := fmt.Sprintf("inputfile='%s';outputfile='%s'", inputFilename, outputFilename) cmd := exec.Command("gnuplot", "-e", files, "-c", gnuplotCfg) cmd.Dir = resultsDir - return cmd.CombinedOutput() + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to generate PNG: %w; output: %s", err, string(output)) + } + + return nil +} + +// GenerateRequestsPNG generates a Requests PNG using gnuplot. +func GenerateRequestsPNG(resultsDir, inputFilename, outputFilename string) error { + return generatePNG(resultsDir, inputFilename, outputFilename, "requests-plot.gp") +} + +// GenerateTTRPNG generates a TTR PNG using gnuplot. +func GenerateTTRPNG(resultsDir, inputFilename, outputFilename string) error { + return generatePNG(resultsDir, inputFilename, outputFilename, "ttr-plot.gp") +} + +// GenerateCPUPNG generates a CPU usage PNG using gnuplot. +func GenerateCPUPNG(resultsDir, inputFilename, outputFilename string) error { + return generatePNG(resultsDir, inputFilename, outputFilename, "cpu-plot.gp") } -// WriteResults writes the vegeta metrics results to the results file in text format. -func WriteResults(resultsFile *os.File, metrics *Metrics) error { +// GenerateMemoryPNG generates a Memory usage PNG using gnuplot. +func GenerateMemoryPNG(resultsDir, inputFilename, outputFilename string) error { + return generatePNG(resultsDir, inputFilename, outputFilename, "memory-plot.gp") +} + +// WriteMetricsResults writes the metrics results to the results file in text format. +func WriteMetricsResults(resultsFile *os.File, metrics *Metrics) error { reporter := vegeta.NewTextReporter(&metrics.Metrics) return reporter.Report(resultsFile) @@ -96,7 +121,27 @@ func WriteContent(resultsFile *os.File, content string) error { return nil } -// NewCSVEncoder returns a vegeta CSV encoder. -func NewCSVEncoder(w io.Writer) vegeta.Encoder { +// NewVegetaCSVEncoder returns a vegeta CSV encoder. +func NewVegetaCSVEncoder(w io.Writer) vegeta.Encoder { return vegeta.NewCSVEncoder(w) } + +// NewCSVResultsWriter creates and returns a CSV results file and writer. +func NewCSVResultsWriter(resultsDir, fileName string, resultHeaders ...string) (*os.File, *csv.Writer, error) { + if err := os.MkdirAll(resultsDir, 0o750); err != nil { + return nil, nil, err + } + + file, err := os.OpenFile(filepath.Join(resultsDir, fileName), os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o644) + if err != nil { + return nil, nil, err + } + + writer := csv.NewWriter(file) + + if err = writer.Write(resultHeaders); err != nil { + return nil, nil, err + } + + return file, writer, nil +} diff --git a/tests/framework/timeout.go b/tests/framework/timeout.go index 2aee2e5a21..2ed69457db 100644 --- a/tests/framework/timeout.go +++ b/tests/framework/timeout.go @@ -6,9 +6,15 @@ type TimeoutConfig struct { // CreateTimeout represents the maximum time for a Kubernetes object to be created. CreateTimeout time.Duration + // UpdateTimeout represents the maximum time for a Kubernetes object to be updated. + UpdateTimeout time.Duration + // DeleteTimeout represents the maximum time for a Kubernetes object to be deleted. DeleteTimeout time.Duration + // DeleteNamespaceTimeout represents the maximum time for a Kubernetes namespace to be deleted. + DeleteNamespaceTimeout time.Duration + // GetTimeout represents the maximum time to get a Kubernetes object. GetTimeout time.Duration @@ -29,7 +35,9 @@ type TimeoutConfig struct { func DefaultTimeoutConfig() TimeoutConfig { return TimeoutConfig{ CreateTimeout: 60 * time.Second, + UpdateTimeout: 60 * time.Second, DeleteTimeout: 10 * time.Second, + DeleteNamespaceTimeout: 60 * time.Second, GetTimeout: 10 * time.Second, ManifestFetchTimeout: 10 * time.Second, RequestTimeout: 10 * time.Second, diff --git a/tests/scale/results/1.0.0/1.0.0.md b/tests/results/scale/1.0.0/1.0.0.md similarity index 100% rename from tests/scale/results/1.0.0/1.0.0.md rename to tests/results/scale/1.0.0/1.0.0.md diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/CPU-no-delay.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/CPU.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/CPU.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/CPU.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/CPU.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/Memory-no-delay.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/Memory.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/Memory.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/Memory.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/Memory.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/TTR.png b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/TTR.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/TTR.png rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/TTR.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/results-no-delay.csv diff --git a/tests/scale/results/1.0.0/TestScale_HTTPRoutes/results.csv b/tests/results/scale/1.0.0/TestScale_HTTPRoutes/results.csv similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPRoutes/results.csv rename to tests/results/scale/1.0.0/TestScale_HTTPRoutes/results.csv diff --git a/tests/scale/results/1.0.0/TestScale_HTTPSListeners/CPU.png b/tests/results/scale/1.0.0/TestScale_HTTPSListeners/CPU.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPSListeners/CPU.png rename to tests/results/scale/1.0.0/TestScale_HTTPSListeners/CPU.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPSListeners/Memory.png b/tests/results/scale/1.0.0/TestScale_HTTPSListeners/Memory.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPSListeners/Memory.png rename to tests/results/scale/1.0.0/TestScale_HTTPSListeners/Memory.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPSListeners/TTR.png b/tests/results/scale/1.0.0/TestScale_HTTPSListeners/TTR.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPSListeners/TTR.png rename to tests/results/scale/1.0.0/TestScale_HTTPSListeners/TTR.png diff --git a/tests/scale/results/1.0.0/TestScale_HTTPSListeners/results.csv b/tests/results/scale/1.0.0/TestScale_HTTPSListeners/results.csv similarity index 100% rename from tests/scale/results/1.0.0/TestScale_HTTPSListeners/results.csv rename to tests/results/scale/1.0.0/TestScale_HTTPSListeners/results.csv diff --git a/tests/scale/results/1.0.0/TestScale_Listeners/CPU.png b/tests/results/scale/1.0.0/TestScale_Listeners/CPU.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_Listeners/CPU.png rename to tests/results/scale/1.0.0/TestScale_Listeners/CPU.png diff --git a/tests/scale/results/1.0.0/TestScale_Listeners/Memory.png b/tests/results/scale/1.0.0/TestScale_Listeners/Memory.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_Listeners/Memory.png rename to tests/results/scale/1.0.0/TestScale_Listeners/Memory.png diff --git a/tests/scale/results/1.0.0/TestScale_Listeners/TTR.png b/tests/results/scale/1.0.0/TestScale_Listeners/TTR.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_Listeners/TTR.png rename to tests/results/scale/1.0.0/TestScale_Listeners/TTR.png diff --git a/tests/scale/results/1.0.0/TestScale_Listeners/results.csv b/tests/results/scale/1.0.0/TestScale_Listeners/results.csv similarity index 100% rename from tests/scale/results/1.0.0/TestScale_Listeners/results.csv rename to tests/results/scale/1.0.0/TestScale_Listeners/results.csv diff --git a/tests/scale/results/1.0.0/TestScale_UpstreamServers/CPU.png b/tests/results/scale/1.0.0/TestScale_UpstreamServers/CPU.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_UpstreamServers/CPU.png rename to tests/results/scale/1.0.0/TestScale_UpstreamServers/CPU.png diff --git a/tests/scale/results/1.0.0/TestScale_UpstreamServers/Memory.png b/tests/results/scale/1.0.0/TestScale_UpstreamServers/Memory.png similarity index 100% rename from tests/scale/results/1.0.0/TestScale_UpstreamServers/Memory.png rename to tests/results/scale/1.0.0/TestScale_UpstreamServers/Memory.png diff --git a/tests/scale/results/1.1.0/1.1.0.md b/tests/results/scale/1.1.0/1.1.0.md similarity index 100% rename from tests/scale/results/1.1.0/1.1.0.md rename to tests/results/scale/1.1.0/1.1.0.md diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/CPU-no-delay.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/CPU.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/CPU.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/CPU.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/CPU.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/Memory-no-delay.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/Memory.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/Memory.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/Memory.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/Memory.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/TTR.png b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/TTR.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/TTR.png rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/TTR.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/results-no-delay.csv diff --git a/tests/scale/results/1.1.0/TestScale_HTTPRoutes/results.csv b/tests/results/scale/1.1.0/TestScale_HTTPRoutes/results.csv similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPRoutes/results.csv rename to tests/results/scale/1.1.0/TestScale_HTTPRoutes/results.csv diff --git a/tests/scale/results/1.1.0/TestScale_HTTPSListeners/CPU.png b/tests/results/scale/1.1.0/TestScale_HTTPSListeners/CPU.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPSListeners/CPU.png rename to tests/results/scale/1.1.0/TestScale_HTTPSListeners/CPU.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPSListeners/Memory.png b/tests/results/scale/1.1.0/TestScale_HTTPSListeners/Memory.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPSListeners/Memory.png rename to tests/results/scale/1.1.0/TestScale_HTTPSListeners/Memory.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPSListeners/TTR.png b/tests/results/scale/1.1.0/TestScale_HTTPSListeners/TTR.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPSListeners/TTR.png rename to tests/results/scale/1.1.0/TestScale_HTTPSListeners/TTR.png diff --git a/tests/scale/results/1.1.0/TestScale_HTTPSListeners/results.csv b/tests/results/scale/1.1.0/TestScale_HTTPSListeners/results.csv similarity index 100% rename from tests/scale/results/1.1.0/TestScale_HTTPSListeners/results.csv rename to tests/results/scale/1.1.0/TestScale_HTTPSListeners/results.csv diff --git a/tests/scale/results/1.1.0/TestScale_Listeners/CPU.png b/tests/results/scale/1.1.0/TestScale_Listeners/CPU.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_Listeners/CPU.png rename to tests/results/scale/1.1.0/TestScale_Listeners/CPU.png diff --git a/tests/scale/results/1.1.0/TestScale_Listeners/Memory.png b/tests/results/scale/1.1.0/TestScale_Listeners/Memory.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_Listeners/Memory.png rename to tests/results/scale/1.1.0/TestScale_Listeners/Memory.png diff --git a/tests/scale/results/1.1.0/TestScale_Listeners/TTR.png b/tests/results/scale/1.1.0/TestScale_Listeners/TTR.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_Listeners/TTR.png rename to tests/results/scale/1.1.0/TestScale_Listeners/TTR.png diff --git a/tests/scale/results/1.1.0/TestScale_Listeners/results.csv b/tests/results/scale/1.1.0/TestScale_Listeners/results.csv similarity index 100% rename from tests/scale/results/1.1.0/TestScale_Listeners/results.csv rename to tests/results/scale/1.1.0/TestScale_Listeners/results.csv diff --git a/tests/scale/results/1.1.0/TestScale_UpstreamServers/CPU.png b/tests/results/scale/1.1.0/TestScale_UpstreamServers/CPU.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_UpstreamServers/CPU.png rename to tests/results/scale/1.1.0/TestScale_UpstreamServers/CPU.png diff --git a/tests/scale/results/1.1.0/TestScale_UpstreamServers/Memory.png b/tests/results/scale/1.1.0/TestScale_UpstreamServers/Memory.png similarity index 100% rename from tests/scale/results/1.1.0/TestScale_UpstreamServers/Memory.png rename to tests/results/scale/1.1.0/TestScale_UpstreamServers/Memory.png diff --git a/tests/scale/results/1.2.0/1.2.0.md b/tests/results/scale/1.2.0/1.2.0.md similarity index 100% rename from tests/scale/results/1.2.0/1.2.0.md rename to tests/results/scale/1.2.0/1.2.0.md diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes/CPU.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes/CPU.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes/Memory.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes/Memory.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes/TTR-without-peak.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes/TTR.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes/TTR.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png b/tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png rename to tests/results/scale/1.2.0/TestScale_HTTPRoutes_Plus/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners/CPU.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners/CPU.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners/Memory.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners/Memory.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners/TTR.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners/TTR.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png b/tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png rename to tests/results/scale/1.2.0/TestScale_HTTPSListeners_Plus/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners/CPU.png b/tests/results/scale/1.2.0/TestScale_Listeners/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners/CPU.png rename to tests/results/scale/1.2.0/TestScale_Listeners/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners/Memory.png b/tests/results/scale/1.2.0/TestScale_Listeners/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners/Memory.png rename to tests/results/scale/1.2.0/TestScale_Listeners/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners/TTR.png b/tests/results/scale/1.2.0/TestScale_Listeners/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners/TTR.png rename to tests/results/scale/1.2.0/TestScale_Listeners/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners_Plus/CPU.png b/tests/results/scale/1.2.0/TestScale_Listeners_Plus/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners_Plus/CPU.png rename to tests/results/scale/1.2.0/TestScale_Listeners_Plus/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners_Plus/Memory.png b/tests/results/scale/1.2.0/TestScale_Listeners_Plus/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners_Plus/Memory.png rename to tests/results/scale/1.2.0/TestScale_Listeners_Plus/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_Listeners_Plus/TTR.png b/tests/results/scale/1.2.0/TestScale_Listeners_Plus/TTR.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_Listeners_Plus/TTR.png rename to tests/results/scale/1.2.0/TestScale_Listeners_Plus/TTR.png diff --git a/tests/scale/results/1.2.0/TestScale_UpstreamServers/CPU.png b/tests/results/scale/1.2.0/TestScale_UpstreamServers/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_UpstreamServers/CPU.png rename to tests/results/scale/1.2.0/TestScale_UpstreamServers/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_UpstreamServers/Memory.png b/tests/results/scale/1.2.0/TestScale_UpstreamServers/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_UpstreamServers/Memory.png rename to tests/results/scale/1.2.0/TestScale_UpstreamServers/Memory.png diff --git a/tests/scale/results/1.2.0/TestScale_UpstreamServers_Plus/CPU.png b/tests/results/scale/1.2.0/TestScale_UpstreamServers_Plus/CPU.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_UpstreamServers_Plus/CPU.png rename to tests/results/scale/1.2.0/TestScale_UpstreamServers_Plus/CPU.png diff --git a/tests/scale/results/1.2.0/TestScale_UpstreamServers_Plus/Memory.png b/tests/results/scale/1.2.0/TestScale_UpstreamServers_Plus/Memory.png similarity index 100% rename from tests/scale/results/1.2.0/TestScale_UpstreamServers_Plus/Memory.png rename to tests/results/scale/1.2.0/TestScale_UpstreamServers_Plus/Memory.png diff --git a/tests/scale/manifests/prom-clusterrole.yaml b/tests/scale/manifests/prom-clusterrole.yaml deleted file mode 100644 index f8aefdd36e..0000000000 --- a/tests/scale/manifests/prom-clusterrole.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: prom ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: prometheus - namespace: prom ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: prometheus - namespace: prom -rules: -- apiGroups: [""] - resources: - - nodes - - services - - endpoints - - pods - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: - - configmaps - verbs: ["get"] -- nonResourceURLs: ["/metrics"] - verbs: ["get"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: prometheus - namespace: prom -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: prometheus -subjects: -- kind: ServiceAccount - name: prometheus - namespace: prom diff --git a/tests/scale/scale.md b/tests/scale/scale.md deleted file mode 100644 index 328378596c..0000000000 --- a/tests/scale/scale.md +++ /dev/null @@ -1,527 +0,0 @@ -# Scale Tests - -This document describes how we scale test NGF. - - -- [Scale Tests](#scale-tests) - - [Goals](#goals) - - [Test Environment](#test-environment) - - [Steps](#steps) - - [Setup](#setup) - - [Run the tests](#run-the-tests) - - [Scale Listeners to Max of 64](#scale-listeners-to-max-of-64) - - [Scale HTTPS Listeners to Max of 64](#scale-https-listeners-to-max-of-64) - - [Scale HTTPRoutes](#scale-httproutes) - - [Scale Upstream Servers](#scale-upstream-servers) - - [Scale HTTP Matches](#scale-http-matches) - - [Analyze](#analyze) - - [Results](#results) - - -## Goals - -- Measure how NGF performs when the number of Gateway API and referenced core Kubernetes resources are scaled. -- Test the following number of resources: - - Max number of HTTP and HTTPS Listeners (64) - - Max number of Upstream Servers (648) - - Max number of HTTPMatches - - 1000 HTTPRoutes - -## Test Environment - -For most of the tests, the following cluster will be sufficient: - -- A Kubernetes cluster with 4 nodes on GKE - - Node: n2d-standard-8 (8 vCPU, 32GB memory) - - Enabled GKE logging -- A GKE VM with access to the cluster. Send all requests from the GKE VM. - -The Upstream Server scale test requires a bigger cluster to accommodate the large number of Pods. Those cluster details -are listed in the [Scale Upstream Servers](#scale-upstream-servers) test steps. - -## Steps - -### Setup - -- Update the version in the [test](scale_test.go) to the release version - -- Install Gateway API Resources: - - ```console - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.1.0/standard-install.yaml - ``` - -- Install edge NGF: - - For OSS: - - ```console - helm install scale-test oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric --create-namespace --wait -n nginx-gateway --version=0.0.0-edge --set nginxGateway.config.logging.level=debug - ``` - - For Plus: - - - Build the NGINX Plus image - - Push the image to the GCR registry - - ```console - helm install scale-test oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric --create-namespace --wait -n nginx-gateway --version=0.0.0-edge --set nginxGateway.config.logging.level=debug --set nginx.plus=true --set nginx.image.repository= --set nginx.image.tag= - ``` - -- Save the Pod Name and LoadBalancer IP for tests: - - ```console - export NGF_IP=$(kubectl get svc -n nginx-gateway scale-test-nginx-gateway-fabric --output jsonpath='{.status.loadBalancer.ingress[0].ip}') - export NGF_POD=$(kubectl get pods -n nginx-gateway -l "app.kubernetes.io/name=nginx-gateway-fabric,app.kubernetes.io/instance=scale-test" -o jsonpath="{.items[0].metadata.name}") - ``` - -- Install Prometheus: - - ```console - kubectl apply -f manifests/prom-clusterrole.yaml - helm repo add prometheus-community https://prometheus-community.github.io/helm-charts - helm repo update - helm install prom prometheus-community/prometheus --set useExistingClusterRoleName=prometheus -n prom - ``` - -- Create a directory under [results](/tests/scale/results) and name it after the version of NGF you are testing. Then - create a file for the result summary, also named after the NGF version. For - example: [1.0.0.md](/tests/scale/results/1.0.0/1.0.0.md). - -### Run the tests - -Run the following tests for both N+ and OSS. - -#### Scale Listeners to Max of 64 - -Test Goal: Measure how NGF performs as the number of Listeners increases to the max of 64. - -Test Plan: - -- Scale up to 64 HTTP Listeners -- All Listeners are on a single Gateway -- Each Listener has 1 HTTPRoute attached -- Each HTTPRoute references 1 unique Service -- Services and Deployments are created before scaling Listeners. -- After each Listener + HTTPRoute is created, measure the time it takes to get a successful response from the new - route (time to ready). -- Record the time to ready in seconds in a csv file for each iteration. - -Total Resources Created: - -- 1 Gateway with 64 Listeners -- 64 HTTPRoutes -- 64 Services, Deployments, Pods - -Follow the steps below to run the test: - -- Run the test: - - With OSS: - - ```console - go test -v -tags scale -run TestScale_Listeners -i 64 - ``` - - With Plus: - - ```console - go test -v -tags scale -run TestScale_Listeners -i 64 -plus - ``` - -- [Analyze](#analyze) the results. - -- Clean up: - - Delete resources from cluster: - - ```console - kubectl delete -Rf TestScale_Listeners - ``` - - Delete generated manifests: - - ```console - rm -rf TestScale_Listeners - ``` - -- Check for any errors or restarts after cleanup. -- Check NGINX conf to make sure it looks correct. - -#### Scale HTTPS Listeners to Max of 64 - -Test Goal: Measure how NGF performs as the number of HTTPS Listeners increases to the max of 64. - -Test Plan: - -- Scale up to 64 HTTPS Listeners -- All Listeners are on a single Gateway -- Each Listener has 1 HTTPRoute attached -- Each Listener references a unique Secret -- Each HTTPRoute references 1 unique Service -- Services, Deployments, and Secrets are created before scaling Listeners -- After each Listener + HTTPRoute is created, measure the time it takes to get a successful response from the new - route (time to ready). -- Record the time to ready in seconds in a csv file for each iteration. - -Total Resources Created: - -- 1 Gateway with 64 HTTPS Listeners -- 64 Secrets -- 64 HTTPRoutes -- 64 Services, Deployments, Pods - -Follow the steps below to run the test: - -- Run the test: - - With OSS: - - ```console - go test -v -tags scale -run TestScale_HTTPSListeners -i 64 - ``` - - With Plus: - - ```console - go test -v -tags scale -run TestScale_HTTPSListeners -i 64 -plus - ``` - -- [Analyze](#analyze) the results. - -- Clean up: - - Delete resources from cluster: - - ```console - kubectl delete -Rf TestScale_HTTPSListeners - ``` - - Delete generated manifests: - - ```console - rm -rf TestScale_HTTPSListeners - ``` - -- Check for any errors or restarts after cleanup. -- Check NGINX conf to make sure it looks correct. - -#### Scale HTTPRoutes - -Test Goal: Measure how NGF performs as the number of HTTPRoutes increases to 1000. - -Test Plan: - -- Scale up to 1000 HTTPRoutes -- All HTTPRoutes attach to a single Gateway with one Listener -- Each HTTPRoute references the same Service -- Service and Deployment are created before scaling HTTPRoutes -- After each HTTPRoute is created, measure the time it takes to get a successful response from the new route (time to - ready). -- Record the time to ready in seconds in a csv file for each iteration. - -Total Resources Created: - -- 1 Gateway with 1 Listener -- 1000 HTTPRoutes -- 1 Service, Deployment, Pod - -Follow the steps below to run the test: - -- Run the test: - - With OSS: - - ```console - go test -v -tags scale -timeout 30m -run TestScale_HTTPRoutes -i 1000 - ``` - - With Plus: - - ```console - go test -v -tags scale -timeout 30m -run TestScale_HTTPRoutes -i 1000 -plus - ``` - - **Optional:** To test with a delay in between each new HTTPRoute, you can add the `-delay` flag to the above command. For example, - to add a 2-second delay: - - ```console - go test -v -tags scale -timeout 60m -run TestScale_HTTPRoutes -i 1000 -delay 2s - ``` - - The test takes longer to run with a delay so make sure to adjust the timeout value. - -- [Analyze](#analyze) the results. - -- Clean up: - - Delete resources from cluster: - - ```console - kubectl delete -Rf TestScale_HTTPRoutes - ``` - - Delete generated manifests: - - ```console - rm -rf TestScale_HTTPRoutes - ``` - -- Check for any errors or restarts after cleanup. -- Check NGINX conf to make sure it looks correct. - -#### Scale Upstream Servers - -Test Goal: Measure how NGF performs as the number of Upstream Servers increases to the max of 648. - -Test Plan: - -- Deploy a single Gateway with 1 Listener and attach one HTTPRoute that references a single Service -- Scale the deployment for that Service to 648 Pods for OSS and 556 Pods for Plus (these are the limits that the upstream zone size allows) -- Gateway, HTTPRoute, Service, and Deployment with 1 replica are created before scaling up to 648 replicas. - -Total Resources Created: - -- 1 Gateway with 1 Listener -- 1 HTTPRoutes -- 1 Service, 1 Deployment, 648 Pods - -Test Environment: - -For this test you must use a much bigger cluster in order to create 648 Pods. - -- A Kubernetes cluster with 12 nodes on GKE - - Node: n2d-standard-16 (16 vCPU, 64GB memory) - - Enabled GKE logging - -Follow the steps below to run the test: - -- Apply manifest - - ```console - kubectl apply -f manifests/scale-upstreams.yaml - ``` - -- Check the status of the Gateway and HTTPRoute to make sure everything is OK before scaling. - - ```console - kubectl describe gateway gateway - kubectl describe httproute route - ``` - -- Get the start time as a UNIX timestamp and record it in the results. - - ```console - date +%s - ``` - - This will be used in the metrics query. - -- Open a new terminal window and start the following loop: - - ```console - for i in $(seq 1 150); do curl --resolve cafe.example.com:80:$NGF_IP http://cafe.example.com:80/; sleep 1; done >> requests.log - ``` - -- Back in your original terminal, scale the backend app: - - For OSS: - - ```console - kubectl scale deploy backend --replicas 648 - ``` - - For Plus: - - ```console - kubectl scale deploy backend --replicas 556 - ``` - -- Wait for all Pods to become available: - - ```console - watch kubectl get deploy backend - ``` - -- Check the NGINX config for 648 upstream servers: - - ```console - kubectl exec -it -n nginx-gateway $NGF_POD -c nginx -- nginx -T | grep -E "server (?:[0-9]{1,3}\.){3}[0-9]{1,3}:8080" | wc -l - ``` - -- Get the end time as a UNIX timestamp and make a note of it: - - ```console - date +%s - ``` - -- In the terminal you started the request loop, kill the loop if it's still running and check the request.log to see if - any of the requests failed. Record any failures in the results file. - -- [Analyze](#analyze) the results. Use the start time and end time you made note of earlier for the - queries. You can calculate the test duration in seconds by subtracting the start time from the end time. - -- Clean up: - - ```console - kubectl delete -f manifests/scale-upstreams.yaml - ``` - -- Check for any errors or restarts after cleanup. -- Check NGINX conf to make sure it looks correct. - -#### Scale HTTP Matches - -Test Goal: Find the difference in latency between the first match and last match for the max length of -the `http_matches` variable. - -Test Plan: - -- Deploy a single Gateway with 1 Listener and attach one HTTPRoute that references a single Service -- Within the HTTPRoute configure the max number of matches (max is determined by the length of the - generated `http_matches` variable (4096 characters)) -- Use `wrk` to send requests to the _first_ match in `http_matches` list and measure the latency -- Use `wrk` to send requests to the _last_ match in `http_matches` list and measure the latency - -Total Resources Created: - -- 1 Gateway with 1 Listener -- 1 HTTPRoute with 7 rules and 50 matches -- 1 Service, 1 Deployment, 1 Pod - -Follow these steps to run the test: - -- Download [wrk](https://github.com/wg/wrk) - -- Apply manifest: - - ```console - kubectl apply -f manifests/scale-matches.yaml - ``` - -- Check the status of the Gateway and HTTPRoute to make sure everything is OK before scaling. - - ```console - kubectl describe gateway gateway - kubectl describe httproute route - ``` - -- Edit your /etc/hosts file and add an entry for "NGF_IP cafe.example.com". - -- Test the first match: - - ```console - wrk -t2 -c10 -d30 http://cafe.example.com -H "header-1: header-1-val" - ``` - -- Test the last match: - - ```console - wrk -t2 -c10 -d30 http://cafe.example.com -H "header-50: header-50-val" - ``` - -- Copy and paste the results into the results file. - -- Clean up: - - ```console - kubectl delete -f manifests/scale-matches.yaml - ``` - -### Analyze - -- Query Prometheus for reload metrics. To access the Prometheus Server, run: - - ```console - export POD_NAME=$(kubectl get pods --namespace prom -l "app.kubernetes.io/name=prometheus,app.kubernetes.io/instance=prom" -o jsonpath="{.items[0].metadata.name}") - kubectl --namespace prom port-forward $POD_NAME 9090 - ``` - - To query Prometheus, you can either browse to localhost:9090 or use curl. The following instructions assume you are - using the prom GUI. - - > Note: - > For the tests that write to a csv file, the `Test Start`, `Test End + 10s`, and `Duration` are at the - > end of the results.csv file in the `results//` directory. - > If you ran the tests with the `plus` flag, the test directory is named `results//_Plus`. - > We are using `Test End + 10s` in the Prometheus query to account for the 10s scraping interval. - - Total number of reloads: - - ```console - nginx_gateway_fabric_nginx_reloads_total - nginx_gateway_fabric_nginx_reloads_total @ - ``` - - Total number of reload errors: - - ```console - nginx_gateway_fabric_nginx_reload_errors_total - nginx_gateway_fabric_nginx_reload_errors_total @ - ``` - - Average reload time (ms): - - ```console - rate(nginx_gateway_fabric_nginx_reloads_milliseconds_sum[] @ ) / - rate(nginx_gateway_fabric_nginx_reloads_milliseconds_count[] @ ) - ``` - - Reload Time Distribution: - - ```console - nginx_gateway_fabric_nginx_reloads_milliseconds_bucket - nginx_gateway_fabric_nginx_reloads_milliseconds_bucket @ - ``` - - Total number of event batches processed: - - ```console - nginx_gateway_fabric_event_batch_processing_milliseconds_count - nginx_gateway_fabric_event_batch_processing_milliseconds_count @ - ``` - - Average event batch processing time (ms): - - ```console - rate(nginx_gateway_fabric_event_batch_processing_milliseconds_sum[] @ ) / - rate(nginx_gateway_fabric_event_batch_processing_milliseconds_count[] @ ) - ``` - - Event Batch Processing Time Distribution: - - ```console - nginx_gateway_fabric_event_batch_processing_milliseconds_bucket - nginx_gateway_fabric_event_batch_processing_milliseconds_bucket @ - ``` - - Record these numbers in a table in the results file. - -- Take screenshots of memory and CPU usage in GKE Dashboard - - To Monitor memory and CPU usage, navigate to the Kubernetes Engine > Workloads > Filter by `nginx-gateway` namespace > - click on NGF Pod name. You should see graphs for CPU, Memory, and Disk. - - - Convert the `Start Time` and `End Time` UNIX timestamps to your local date time: - - ```console - date -r - ``` - - - Create a custom time frame for the graphs in GKE. - - Take a screenshot of the CPU and Memory graphs individually. Store them in the `results//` - directory. - -- If the test writes time to ready numbers to a csv, create a time to ready graph. - - Use https://chart-studio.plotly.com/create/#/ to plot the time to ready numbers on a graph. - - Remove the `"Test Start", "Test End", "Test End + 10s", "Duration"` rows from the bottom of the csv. - - Upload the csv file to plotly. - - Create a new `Trace`, select `Line` as the type. - - Set the Y axis to the Time to Ready column. - - Set the X axis to the number of resources column. - - Label the graph and take a screenshot. - - Store the graph in the `results//` directory. - -- Check for errors or restarts and record in the results file. File a bug if there's unexpected errors or restarts. -- Check NGINX conf and make sure it looks correct. File a bug if there is an issue. - -### Results - -- [1.0.0](/tests/scale/results/1.0.0/1.0.0.md) -- [1.1.0](/tests/scale/results/1.1.0/1.1.0.md) -- [1.2.0](/tests/scale/results/1.2.0/1.2.0.md) diff --git a/tests/scale/scale_test.go b/tests/scale/scale_test.go deleted file mode 100644 index 89f87a1d82..0000000000 --- a/tests/scale/scale_test.go +++ /dev/null @@ -1,259 +0,0 @@ -//go:build scale -// +build scale - -package scale - -import ( - "context" - "crypto/tls" - "encoding/csv" - "flag" - "fmt" - "net/http" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" - "testing" - "time" - - "k8s.io/apimachinery/pkg/util/wait" -) - -// testing flags -var ( - numIterations = flag.Int("i", 1, "number of times to scale the resource") - delay = flag.Duration("delay", 0, "delay between each scaling iteration") - version = flag.String("version", "1.2.0", "version of NGF under test") - plus = flag.Bool("plus", false, "nginx-plus enabled") -) - -func TestScale_Listeners(t *testing.T) { - ip := getIP(t) - url := fmt.Sprintf("http://%s/", ip) - - runScaleTest( - t, - []string{"# Listeners", "Time to Ready (s)", "Error"}, - func(dir string) error { - return generateScaleListenerManifests(*numIterations, dir, false /*non-tls*/) - }, - url, - ) -} - -func TestScale_HTTPSListeners(t *testing.T) { - ip := getIP(t) - url := fmt.Sprintf("https://%s/", ip) - - runScaleTest( - t, - []string{"# HTTPS Listeners", "Time to Ready (s)", "Error"}, - func(dir string) error { - return generateScaleListenerManifests(*numIterations, dir, true /*tls*/) - }, - url, - ) -} - -func TestScale_HTTPRoutes(t *testing.T) { - ip := getIP(t) - url := fmt.Sprintf("http://%s/", ip) - - runScaleTest( - t, - []string{"# HTTPRoutes", "Time to Ready (s)", "Error"}, - func(dir string) error { - return generateScaleHTTPRouteManifests(*numIterations, dir) - }, - url, - ) -} - -func runScaleTest( - t *testing.T, - resultHeaders []string, - generateManifests func(dir string) error, - url string, -) { - t.Helper() - manifestDir := t.Name() - - writer := newResultsWriter(t, t.Name(), resultHeaders...) - - if err := generateManifests(manifestDir); err != nil { - t.Fatalf("failed to generate manifests: %s", err) - } - - startTime := time.Now() - startUnix := fmt.Sprintf("%d", startTime.Unix()) - - if err := kubectlApply(getPrereqDirName(manifestDir)); err != nil { - t.Fatalf("failed to apply prerequisite resources: %s", err) - } - - t.Log("Waiting for all Pods to be Ready") - if err := kubectlWaitAllPodsReady(); err != nil { - t.Fatalf("failed to wait for all Pods to be Ready: %s", err) - } - - for i := 0; i < *numIterations; i++ { - t.Logf("Scaling up to %d resources", i) - - manifestFile := filepath.Join(manifestDir, fmt.Sprintf("manifest-%d.yaml", i)) - - if err := kubectlApply(manifestFile); err != nil { - t.Errorf("failed to scale up: %s", err) - } - - host := fmt.Sprintf("%d.example.com", i) - - t.Logf("Sending request to url %s with host %s...", url, host) - - ttr, err := waitForResponseForHost(url, host) - - seconds := ttr.Seconds() - record := []string{strconv.Itoa(i + 1), strconv.FormatFloat(seconds, 'f', -1, 64)} - if err != nil { - record = append(record, err.Error()) - } - - if err = writer.Write(record); err != nil { - t.Fatalf("failed to write time to ready to csv file: %s", err) - } - - time.Sleep(*delay) - } - - endTime := time.Now() - endUnix := fmt.Sprintf("%d", endTime.Unix()) - - // This accounts for prometheus 10s scraping window - endUnixPlusTen := fmt.Sprintf("%d", endTime.Add(10*time.Second).Unix()) - - records := [][]string{ - {"Test Start", "Test End", "Test End + 10s", "Duration"}, - {startUnix, endUnix, endUnixPlusTen, endTime.Sub(startTime).String()}, - } - - if err := writer.WriteAll(records); err != nil { - t.Logf("failed to write records to csv") - } -} - -func getIP(t *testing.T) string { - t.Helper() - - ip := os.Getenv("NGF_IP") - if ip == "" { - t.Fatalf("NGF_IP env var not set") - } - - return ip -} - -func newResultsWriter(t *testing.T, testName string, resultHeaders ...string) *csv.Writer { - t.Helper() - - versionDir := filepath.Join("results", *version) - if err := os.Mkdir(versionDir, 0o750); err != nil && !os.IsExist(err) { - t.Fatalf("failed to create results version directory: %s", err) - } - - testDirName := testName - if *plus { - testDirName += "_Plus" - } - - dir := filepath.Join(versionDir, testDirName) - if err := os.Mkdir(dir, 0o750); err != nil { - t.Fatalf("failed to create results test directory: %s", err) - } - - file, err := os.Create(filepath.Join(dir, "results.csv")) - if err != nil { - t.Fatalf("failed to create results csv file: %s", err) - } - - writer := csv.NewWriter(file) - - if err = writer.Write(resultHeaders); err != nil { - t.Fatalf("failed to write headers to csv file: %s", err) - } - - t.Cleanup(func() { - writer.Flush() - _ = file.Close() - }) - - return writer -} - -func kubectlApply(filename string) error { - if err := kubectlExec("apply", "-f", filename); err != nil { - return fmt.Errorf("error applying %s: %w", filename, err) - } - - return nil -} - -func kubectlWaitAllPodsReady() error { - if err := kubectlExec("wait", "pod", "--all", "--for=condition=Ready"); err != nil { - return fmt.Errorf("error waiting for all pods to be ready:%w", err) - } - - return nil -} - -func kubectlExec(arg ...string) error { - cmd := exec.Command("kubectl", arg...) - - return cmd.Run() -} - -func waitForResponseForHost(url, host string) (time.Duration, error) { - client := &http.Client{} - - if strings.HasPrefix(url, "https") { - customTransport := http.DefaultTransport.(*http.Transport) - customTransport.TLSClientConfig = &tls.Config{ - InsecureSkipVerify: true, // nolint: gosec - ServerName: host, - } - client.Transport = customTransport - } - - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - defer cancel() - - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) - if err != nil { - return 0, err - } - - req.Host = host - - start := time.Now() - - err = wait.PollUntilContextCancel( - ctx, - 200*time.Millisecond, - true, - func(ctx context.Context) (done bool, err error) { - resp, err := client.Do(req) - if err != nil { - fmt.Println("Retrying GET request", "error", err) - return false, err - } - - if resp.StatusCode == http.StatusOK { - return true, nil - } - - fmt.Println("Retrying GET request", "host", host, "status", resp.Status) - return false, nil - }) - - return time.Since(start), err -} diff --git a/tests/scripts/cpu-plot.gp b/tests/scripts/cpu-plot.gp new file mode 100644 index 0000000000..71584811a3 --- /dev/null +++ b/tests/scripts/cpu-plot.gp @@ -0,0 +1,20 @@ +set terminal png size 800,600 +set title "CPU Usage" +set datafile separator "," +set output outputfile . "" + +# X-axis settings +set xlabel "Timestamp" +set xdata time +set timefmt "%s" +set format x "%M:%S" +set xrange [*:*] +set xtics nomirror + +# Y-axis settings +set yrange [0:*] +set ylabel "CPU Usage (core seconds)" +set format y "%.2f" + +# Plotting data +plot inputfile using 1:2 with lines lw 2 notitle diff --git a/tests/scripts/create-gke-cluster.sh b/tests/scripts/create-gke-cluster.sh index 9d034e1c62..b5a2061f8b 100644 --- a/tests/scripts/create-gke-cluster.sh +++ b/tests/scripts/create-gke-cluster.sh @@ -6,6 +6,16 @@ ip_random_digit=$((1 + $RANDOM % 250)) IS_CI=${1:-false} +if [ -z "$GKE_MACHINE_TYPE" ]; then + # If the environment variable is not set, use a default value + GKE_MACHINE_TYPE="e2-medium" +fi + +if [ -z "$GKE_NUM_NODES" ]; then + # If the environment variable is not set, use a default value + GKE_NUM_NODES="3" +fi + gcloud container clusters create ${GKE_CLUSTER_NAME} \ --project ${GKE_PROJECT} \ --zone ${GKE_CLUSTER_ZONE} \ @@ -16,7 +26,9 @@ gcloud container clusters create ${GKE_CLUSTER_NAME} \ --master-ipv4-cidr 172.16.${ip_random_digit}.32/28 \ --metadata=block-project-ssh-keys=TRUE \ --monitoring=SYSTEM,POD,DEPLOYMENT \ - --logging=SYSTEM,WORKLOAD + --logging=SYSTEM,WORKLOAD \ + --machine-type ${GKE_MACHINE_TYPE} \ + --num-nodes ${GKE_NUM_NODES} # Add current IP to GKE master control node access, if this script is not invoked during a CI run. if [ "${IS_CI}" = "false" ]; then diff --git a/tests/scripts/memory-plot.gp b/tests/scripts/memory-plot.gp new file mode 100644 index 0000000000..c25614db70 --- /dev/null +++ b/tests/scripts/memory-plot.gp @@ -0,0 +1,21 @@ +# Define a function to convert bytes to Mebibytes +bytes_to_MiB(bytes) = bytes / (1024.0 * 1024.0) + +set terminal png size 800,600 +set title "Memory Usage" +set datafile separator "," +set output outputfile . "" + +# X-axis settings +set xlabel "Timestamp" +set xdata time +set timefmt "%s" +set format x "%M:%S" +set xrange [*:*] # Specify a range covering all timestamps + +# Y-axis settings +set yrange [0:*] +set ylabel "Memory Usage (MiB)" + +# Plotting data +plot inputfile using 1:(bytes_to_MiB($2)) with lines lw 2 notitle diff --git a/tests/scripts/ttr-plot.gp b/tests/scripts/ttr-plot.gp new file mode 100644 index 0000000000..97d0eb2891 --- /dev/null +++ b/tests/scripts/ttr-plot.gp @@ -0,0 +1,18 @@ +set terminal png size 800,600 +set title "Scaling resources" +set datafile separator "," +set output outputfile . "" + +# X-axis settings +set xrange [0:70] +set xtics 10 +set xlabel "# Resources" +set grid xtics + +# Y-axis settings +set yrange [0:*] +set ylabel "Time to Ready (s)" +set format y "%.1f" + +# Plotting data +plot inputfile using 1:2 with lines lw 2 notitle diff --git a/tests/scripts/vars.env-example b/tests/scripts/vars.env-example index c4163b0120..52138f113f 100644 --- a/tests/scripts/vars.env-example +++ b/tests/scripts/vars.env-example @@ -20,3 +20,5 @@ SOURCE_IP_RANGE= PLUS_ENABLED= NGF_VERSION= +GKE_MACHINE_TYPE= +GKE_NUM_NODES= diff --git a/tests/suite/dataplane_perf_test.go b/tests/suite/dataplane_perf_test.go index 743e79977c..3860133e79 100644 --- a/tests/suite/dataplane_perf_test.go +++ b/tests/suite/dataplane_perf_test.go @@ -77,7 +77,7 @@ var _ = Describe("Dataplane performance", Ordered, Label("nfr", "performance"), AfterAll(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) outFile.Close() }) @@ -97,7 +97,7 @@ var _ = Describe("Dataplane performance", Ordered, Label("nfr", "performance"), } _, metrics := framework.RunLoadTest(cfg) - Expect(framework.WriteResults(outFile, &metrics)).To(Succeed()) + Expect(framework.WriteMetricsResults(outFile, &metrics)).To(Succeed()) _, err = fmt.Fprint(outFile, "```\n") Expect(err).ToNot(HaveOccurred()) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index bf6c61295b..b4148039e4 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -78,7 +78,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("nfr", "graceful-recov AfterAll(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) }) It("recovers when NGF container is restarted", func() { diff --git a/tests/suite/longevity_test.go b/tests/suite/longevity_test.go index ec60aa431e..2ddf66c442 100644 --- a/tests/suite/longevity_test.go +++ b/tests/suite/longevity_test.go @@ -81,7 +81,7 @@ var _ = Describe("Longevity", Label("longevity-setup", "longevity-teardown"), fu Expect(writeTrafficResults(resultsFile, homeDir, "tea.txt", "HTTPS")).To(Succeed()) Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) }) }) diff --git a/tests/scale/manifests/scale-matches.yaml b/tests/suite/manifests/scale/matches.yaml similarity index 100% rename from tests/scale/manifests/scale-matches.yaml rename to tests/suite/manifests/scale/matches.yaml diff --git a/tests/scale/manifests/scale-upstreams.yaml b/tests/suite/manifests/scale/upstreams.yaml similarity index 100% rename from tests/scale/manifests/scale-upstreams.yaml rename to tests/suite/manifests/scale/upstreams.yaml diff --git a/tests/suite/sample_test.go b/tests/suite/sample_test.go index 3996c67646..56d08713a6 100644 --- a/tests/suite/sample_test.go +++ b/tests/suite/sample_test.go @@ -34,7 +34,7 @@ var _ = Describe("Basic test example", Label("functional"), func() { AfterEach(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) }) It("sends traffic", func() { diff --git a/tests/suite/scale_test.go b/tests/suite/scale_test.go new file mode 100644 index 0000000000..8d7c187db7 --- /dev/null +++ b/tests/suite/scale_test.go @@ -0,0 +1,797 @@ +package suite + +import ( + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strconv" + "strings" + "text/template" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" + core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + ctlr "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/nginxinc/nginx-gateway-fabric/tests/framework" +) + +var _ = Describe("Scale test", Ordered, Label("nfr", "scale"), func() { + // One of the tests - scales upstream servers - requires a big cluster to provision 648 pods. + // On GKE, you can use the following configuration: + // - A Kubernetes cluster with 12 nodes on GKE + // - Node: n2d-standard-16 (16 vCPU, 64GB memory) + + var ( + matchesManifests = []string{ + "scale/matches.yaml", + } + upstreamsManifests = []string{ + "scale/upstreams.yaml", + } + + namespace = "scale" + + scrapeInterval = 15 * time.Second + queryRangeStep = 15 * time.Second + + resultsDir string + outFile *os.File + ngfPodName string + promInstance framework.PrometheusInstance + promPortForwardStopCh = make(chan struct{}) + ) + + const ( + httpListenerCount = 64 + httpsListenerCount = 64 + httpRouteCount = 1000 + upstreamServerCount = 648 + ) + + BeforeAll(func() { + // Scale tests need a dedicated NGF instance + // Because they analyze the logs of NGF and NGINX, and they don't want to analyze the logs of other tests. + teardown(releaseName) + + var err error + resultsDir, err = framework.CreateResultsDir("scale", version) + Expect(err).ToNot(HaveOccurred()) + + filename := filepath.Join(resultsDir, framework.CreateResultsFilename("md", version, *plusEnabled)) + outFile, err = framework.CreateResultsFile(filename) + Expect(err).ToNot(HaveOccurred()) + Expect(framework.WriteSystemInfoToFile(outFile, clusterInfo, *plusEnabled)).To(Succeed()) + + promCfg := framework.PrometheusConfig{ + ScrapeInterval: scrapeInterval, + } + + promInstance, err = framework.InstallPrometheus(resourceManager, promCfg) + Expect(err).ToNot(HaveOccurred()) + + k8sConfig := ctlr.GetConfigOrDie() + + if !clusterInfo.IsGKE { + Expect(promInstance.PortForward(k8sConfig, promPortForwardStopCh)).To(Succeed()) + } + }) + + BeforeEach(func() { + // Scale tests need a dedicated NGF instance per test. + // Because they analyze the logs of NGF and NGINX, and they don't want to analyze the logs of other tests. + cfg := getDefaultSetupCfg() + cfg.nfr = true + setup(cfg) + + ns := &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespace, + }, + } + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) + + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).To(HaveLen(1)) + ngfPodName = podNames[0] + }) + + createResponseChecker := func(url, address string) func() error { + return func() error { + status, _, err := framework.Get(url, address, timeoutConfig.RequestTimeout) + if err != nil { + return fmt.Errorf("bad response: %w", err) + } + + if status != 200 { + return fmt.Errorf("unexpected status code: %d", status) + } + + return nil + } + } + + createMetricExistChecker := func(query string, getTime func() time.Time, modifyTime func()) func() error { + return func() error { + queryWithTimestamp := fmt.Sprintf("%s @ %d", query, getTime().Unix()) + + result, err := promInstance.Query(queryWithTimestamp) + if err != nil { + return fmt.Errorf("failed to query Prometheus: %w", err) + } + + if result.String() == "" { + modifyTime() + return errors.New("empty result") + } + + return nil + } + } + + createEndTimeFinder := func(query string, startTime time.Time, t *time.Time) func() error { + return func() error { + result, err := promInstance.QueryRange(query, v1.Range{ + Start: startTime, + End: *t, + Step: queryRangeStep, + }) + if err != nil { + return fmt.Errorf("failed to query Prometheus: %w", err) + } + + if result.String() == "" { + *t = time.Now() + return errors.New("empty result") + } + + return nil + } + } + + getFirstValueOfVector := func(query string) float64 { + result, err := promInstance.Query(query) + Expect(err).ToNot(HaveOccurred()) + + val, err := framework.GetFirstValueOfPrometheusVector(result) + Expect(err).ToNot(HaveOccurred()) + + return val + } + + getBuckets := func(query string) []bucket { + result, err := promInstance.Query(query) + Expect(err).ToNot(HaveOccurred()) + + res, ok := result.(model.Vector) + Expect(ok).To(BeTrue()) + + buckets := make([]bucket, 0, len(res)) + + for _, sample := range res { + le := sample.Metric["le"] + val := float64(sample.Value) + bucket := bucket{ + Le: string(le), + Val: int(val), + } + buckets = append(buckets, bucket) + } + + return buckets + } + + checkLogErrors := func( + containerName string, + substrings []string, + ignoredSubstrings []string, + fileName string, + ) int { + logs, err := resourceManager.GetPodLogs(ngfNamespace, ngfPodName, &core.PodLogOptions{ + Container: containerName, + }) + Expect(err).ToNot(HaveOccurred()) + + logLines := strings.Split(logs, "\n") + errors := 0 + + outer: + for _, line := range logLines { + for _, substr := range ignoredSubstrings { + if strings.Contains(line, substr) { + continue outer + } + } + for _, substr := range substrings { + if strings.Contains(line, substr) { + errors++ + continue outer + } + } + } + + // attach full logs + if errors > 0 { + f, err := os.Create(fileName) + Expect(err).ToNot(HaveOccurred()) + defer f.Close() + + _, err = io.WriteString(f, logs) + Expect(err).ToNot(HaveOccurred()) + } + return errors + } + + runTestWithMetricsAndLogs := func(testName, testResultsDir string, test func()) { + var ( + metricExistTimeout = 2 * time.Minute + metricExistPolling = 1 * time.Second + ) + + startTime := time.Now() + + // We need to make sure that for the startTime, the metrics exists in Prometheus. + // if they don't exist, we increase the startTime and try again. + // Note: it's important that Polling interval in Eventually is greater than the startTime increment. + + getStartTime := func() time.Time { return startTime } + modifyStartTime := func() { startTime = startTime.Add(500 * time.Millisecond) } + + queries := []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + fmt.Sprintf(`container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), + } + + for _, q := range queries { + Eventually( + createMetricExistChecker( + q, + getStartTime, + modifyStartTime, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + } + + test() + + // We sleep for 2 scape intervals to ensure that Prometheus scrapes the metrics after the test() finishes + // before endTime, so that we don't lose any metric values like reloads. + time.Sleep(2 * scrapeInterval) + + endTime := time.Now() + + // Now we check that Prometheus has the metrics for the endTime + + // If the test duration is small (which can happen if you run the test with small number of resources), + // the rate query may not return any data. + // To ensure it returns data, we increase the startTime. + Eventually( + createEndTimeFinder( + fmt.Sprintf(`rate(container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}[2m])`, ngfPodName), + startTime, + &endTime, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + + getEndTime := func() time.Time { return endTime } + noOpModifier := func() {} + + queries = []string{ + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + // We don't need to check all nginx_gateway_fabric_* metrics, as they are collected at the same time + fmt.Sprintf(`nginx_gateway_fabric_nginx_reloads_total{pod="%s"}`, ngfPodName), + } + + for _, q := range queries { + Eventually( + createMetricExistChecker( + q, + getEndTime, + noOpModifier, + ), + ).WithTimeout(metricExistTimeout).WithPolling(metricExistPolling).Should(Succeed()) + } + + // Collect metric values + // For some metrics, generate PNGs + + result, err := promInstance.QueryRange( + fmt.Sprintf(`container_memory_usage_bytes{pod="%s",container="nginx-gateway"}`, ngfPodName), + v1.Range{ + Start: startTime, + End: endTime, + Step: queryRangeStep, + }, + ) + Expect(err).ToNot(HaveOccurred()) + + memCSV := filepath.Join(testResultsDir, framework.CreateResultsFilename("csv", "memory", *plusEnabled)) + Expect(framework.WritePrometheusMatrixToCSVFile(memCSV, result)).To(Succeed()) + + memPNG := framework.CreateResultsFilename("png", "memory", *plusEnabled) + Expect( + framework.GenerateMemoryPNG(testResultsDir, memCSV, memPNG), + ).To(Succeed()) + + Expect(os.Remove(memCSV)).To(Succeed()) + + result, err = promInstance.QueryRange( + fmt.Sprintf(`rate(container_cpu_usage_seconds_total{pod="%s",container="nginx-gateway"}[2m])`, ngfPodName), + v1.Range{ + Start: startTime, + End: endTime, + Step: queryRangeStep, + }, + ) + Expect(err).ToNot(HaveOccurred()) + + cpuCSV := filepath.Join(testResultsDir, framework.CreateResultsFilename("csv", "cpu", *plusEnabled)) + Expect(framework.WritePrometheusMatrixToCSVFile(cpuCSV, result)).To(Succeed()) + + cpuPNG := framework.CreateResultsFilename("png", "cpu", *plusEnabled) + Expect( + framework.GenerateCPUPNG(testResultsDir, cpuCSV, cpuPNG), + ).To(Succeed()) + + Expect(os.Remove(cpuCSV)).To(Succeed()) + + reloadCount := getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + ) + + reloadErrsCount := getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reload_errors_total{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + ) + + reloadAvgTime := getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + ` / `+ + `(nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_total{pod="%[1]s"} @ %[2]d)`, + ngfPodName, + startTime.Unix(), + )) + + reloadBuckets := getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_nginx_reloads_milliseconds_bucket{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + ) + + eventsCount := getFirstValueOfVector( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + ) + + eventsAvgTime := getFirstValueOfVector( + fmt.Sprintf( + `(nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_sum{pod="%[1]s"} @ %[2]d)`+ + ` / `+ + `(nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_count{pod="%[1]s"} @ %[2]d)`, + ngfPodName, + startTime.Unix(), + ), + ) + + eventsBuckets := getBuckets( + fmt.Sprintf( + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"}`+ + ` - `+ + `nginx_gateway_fabric_event_batch_processing_milliseconds_bucket{pod="%[1]s"} @ %d`, + ngfPodName, + startTime.Unix(), + ), + ) + + // Check container logs for errors + + ngfErrors := checkLogErrors( + "nginx-gateway", + []string{"error"}, + []string{`"logger":"usageReporter`}, // ignore usageReporter errors + filepath.Join(testResultsDir, framework.CreateResultsFilename("log", "ngf", *plusEnabled)), + ) + nginxErrors := checkLogErrors( + "nginx", + []string{"[error]", "[emerg]", "[crit]", "[alert]"}, + nil, + filepath.Join(testResultsDir, framework.CreateResultsFilename("log", "nginx", *plusEnabled)), + ) + + // Check container restarts + + pod, err := resourceManager.GetPod(ngfNamespace, ngfPodName) + Expect(err).ToNot(HaveOccurred()) + + findRestarts := func(name string) int { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.Name == name { + return int(containerStatus.RestartCount) + } + } + Fail(fmt.Sprintf("container %s not found", name)) + return 0 + } + + ngfRestarts := findRestarts("nginx-gateway") + nginxRestarts := findRestarts("nginx") + + // Write results + + results := scaleTestResults{ + Name: testName, + ReloadCount: int(reloadCount), + ReloadErrsCount: int(reloadErrsCount), + ReloadAvgTime: int(reloadAvgTime), + ReloadBuckets: reloadBuckets, + EventsCount: int(eventsCount), + EventsAvgTime: int(eventsAvgTime), + EventsBuckets: eventsBuckets, + NGFErrors: ngfErrors, + NginxErrors: nginxErrors, + NGFContainerRestarts: ngfRestarts, + NginxContainerRestarts: nginxRestarts, + } + + err = writeScaleResults(outFile, results) + Expect(err).ToNot(HaveOccurred()) + } + + runScaleResources := func(objects framework.ScaleObjects, testResultsDir string, protocol string) { + ttrCsvFileName := framework.CreateResultsFilename("csv", "ttr", *plusEnabled) + ttrCsvFile, writer, err := framework.NewCSVResultsWriter(testResultsDir, ttrCsvFileName) + Expect(err).ToNot(HaveOccurred()) + defer ttrCsvFile.Close() + + Expect(resourceManager.Apply(objects.BaseObjects)).To(Succeed()) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + Expect(resourceManager.WaitForPodsToBeReady(ctx, namespace)).To(Succeed()) + + for i := 0; i < len(objects.ScaleIterationGroups); i++ { + Expect(resourceManager.Apply(objects.ScaleIterationGroups[i])).To(Succeed()) + + var url string + if protocol == "http" && portFwdPort != 0 { + url = fmt.Sprintf("%s://%d.example.com:%d", protocol, i, portFwdPort) + } else if protocol == "https" && portFwdHTTPSPort != 0 { + url = fmt.Sprintf("%s://%d.example.com:%d", protocol, i, portFwdHTTPSPort) + } else { + url = fmt.Sprintf("%s://%d.example.com", protocol, i) + } + + startCheck := time.Now() + + Eventually( + createResponseChecker(url, address), + ).WithTimeout(30 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + ttr := time.Since(startCheck) + + seconds := ttr.Seconds() + record := []string{strconv.Itoa(i + 1), strconv.FormatFloat(seconds, 'f', -1, 64)} + + Expect(writer.Write(record)).To(Succeed()) + } + + writer.Flush() + Expect(ttrCsvFile.Close()).To(Succeed()) + + ttrPNG := framework.CreateResultsFilename("png", "ttr", *plusEnabled) + Expect( + framework.GenerateTTRPNG(testResultsDir, ttrCsvFile.Name(), ttrPNG), + ).To(Succeed()) + + Expect(os.Remove(ttrCsvFile.Name())).To(Succeed()) + } + + runScaleUpstreams := func() { + Expect(resourceManager.ApplyFromFiles(upstreamsManifests, namespace)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(namespace)).To(Succeed()) + + var url string + if portFwdPort != 0 { + url = fmt.Sprintf("http://hello.example.com:%d", portFwdPort) + } else { + url = "http://hello.example.com" + } + + Eventually( + createResponseChecker(url, address), + ).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + + Expect( + resourceManager.ScaleDeployment(namespace, "backend", upstreamServerCount), + ).To(Succeed()) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + Expect(resourceManager.WaitForPodsToBeReady(ctx, namespace)).To(Succeed()) + + Eventually( + createResponseChecker(url, address), + ).WithTimeout(5 * time.Second).WithPolling(100 * time.Millisecond).Should(Succeed()) + } + + setNamespace := func(objects framework.ScaleObjects) { + for _, obj := range objects.BaseObjects { + obj.SetNamespace(namespace) + } + for _, objs := range objects.ScaleIterationGroups { + for _, obj := range objs { + obj.SetNamespace(namespace) + } + } + } + + It(fmt.Sprintf("scales HTTP listeners to %d", httpListenerCount), func() { + const testName = "TestScale_Listeners" + + testResultsDir := filepath.Join(resultsDir, testName) + Expect(os.MkdirAll(testResultsDir, 0755)).To(Succeed()) + + objects, err := framework.GenerateScaleListenerObjects(httpListenerCount, false /*non-tls*/) + Expect(err).ToNot(HaveOccurred()) + + setNamespace(objects) + + runTestWithMetricsAndLogs( + testName, + testResultsDir, + func() { + runScaleResources( + objects, + testResultsDir, + "http", + ) + }, + ) + }) + + It(fmt.Sprintf("scales HTTPS listeners to %d", httpsListenerCount), func() { + const testName = "TestScale_HTTPSListeners" + + testResultsDir := filepath.Join(resultsDir, testName) + Expect(os.MkdirAll(testResultsDir, 0o755)).To(Succeed()) + + objects, err := framework.GenerateScaleListenerObjects(httpsListenerCount, true /*tls*/) + Expect(err).ToNot(HaveOccurred()) + + setNamespace(objects) + + runTestWithMetricsAndLogs( + testName, + testResultsDir, + func() { + runScaleResources( + objects, + testResultsDir, + "https", + ) + }, + ) + }) + + It(fmt.Sprintf("scales HTTP routes to %d", httpRouteCount), func() { + const testName = "TestScale_HTTPRoutes" + + testResultsDir := filepath.Join(resultsDir, testName) + Expect(os.MkdirAll(testResultsDir, 0o755)).To(Succeed()) + + objects, err := framework.GenerateScaleHTTPRouteObjects(httpRouteCount) + Expect(err).ToNot(HaveOccurred()) + + setNamespace(objects) + + runTestWithMetricsAndLogs( + testName, + testResultsDir, + func() { + runScaleResources( + objects, + testResultsDir, + "http", + ) + }, + ) + }) + + It(fmt.Sprintf("scales upstream servers to %d", upstreamServerCount), func() { + const testName = "TestScale_UpstreamServers" + + testResultsDir := filepath.Join(resultsDir, testName) + Expect(os.MkdirAll(testResultsDir, 0o755)).To(Succeed()) + + runTestWithMetricsAndLogs( + testName, + testResultsDir, + func() { + runScaleUpstreams() + }, + ) + }) + + It("scales HTTP matches", func() { + const testName = "TestScale_HTTPMatches" + + Expect(resourceManager.ApplyFromFiles(matchesManifests, namespace)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(namespace)).To(Succeed()) + + var port int + if portFwdPort != 0 { + port = portFwdPort + } else { + port = 80 + } + + addr := fmt.Sprintf("%s:%d", address, port) + + baseURL := "http://cafe.example.com" + + text := fmt.Sprintf("\n## Test %s\n\n", testName) + + _, err := fmt.Fprint(outFile, text) + Expect(err).ToNot(HaveOccurred()) + + run := func(t framework.Target) { + cfg := framework.LoadTestConfig{ + Targets: []framework.Target{t}, + Rate: 1000, + Duration: 30 * time.Second, + Description: "First matches", + Proxy: addr, + ServerName: "cafe.example.com", + } + _, metrics := framework.RunLoadTest(cfg) + + _, err = fmt.Fprintln(outFile, "```text") + Expect(err).ToNot(HaveOccurred()) + Expect(framework.WriteMetricsResults(outFile, &metrics)).To(Succeed()) + _, err = fmt.Fprintln(outFile, "```") + Expect(err).ToNot(HaveOccurred()) + } + + run(framework.Target{ + Method: "GET", + URL: fmt.Sprintf("%s%s", baseURL, "/latte"), + Header: map[string][]string{ + "header-1": {"header-1-val"}, + }, + }) + + run(framework.Target{ + Method: "GET", + URL: fmt.Sprintf("%s%s", baseURL, "/latte"), + Header: map[string][]string{ + "header-50": {"header-50-val"}, + }, + }) + }) + + AfterEach(func() { + teardown(releaseName) + Expect(resourceManager.DeleteNamespace(namespace)).To(Succeed()) + }) + + AfterAll(func() { + close(promPortForwardStopCh) + Expect(framework.UninstallPrometheus(resourceManager)).To(Succeed()) + Expect(outFile.Close()).To(Succeed()) + + // restoring NGF shared among tests in the suite + cfg := getDefaultSetupCfg() + cfg.nfr = true + setup(cfg) + }) +}) + +type bucket struct { + Le string + Val int +} + +type scaleTestResults struct { + Name string + + ReloadCount int + ReloadErrsCount int + ReloadAvgTime int + ReloadBuckets []bucket + + EventsCount int + EventsAvgTime int + EventsBuckets []bucket + + NGFErrors int + NginxErrors int + + NGFContainerRestarts int + NginxContainerRestarts int +} + +const scaleResultTemplate = ` +## Test {{ .Name }} + +### Reloads + +- Total: {{ .ReloadCount }} +- Total Errors: {{ .ReloadErrsCount }} +- Average Time: {{ .ReloadAvgTime }}ms +- Reload distribution: +{{- range .ReloadBuckets }} + - {{ .Le }}ms: {{ .Val }} +{{- end }} + +### Event Batch Processing + +- Total: {{ .EventsCount }} +- Average Time: {{ .EventsAvgTime }}ms +- Event Batch Processing distribution: +{{- range .EventsBuckets }} + - {{ .Le }}ms: {{ .Val }} +{{- end }} + +### Errors + +- NGF errors: {{ .NGFErrors }} +- NGF container restarts: {{ .NGFContainerRestarts }} +- NGINX errors: {{ .NginxErrors }} +- NGINX container restarts: {{ .NginxContainerRestarts }} + +### Graphs and Logs + +See [output directory](./{{ .Name }}) for more details. +The logs are attached only if there are errors. +` + +func writeScaleResults(dest io.Writer, results scaleTestResults) error { + tmpl, err := template.New("results").Parse(scaleResultTemplate) + if err != nil { + return err + } + + return tmpl.Execute(dest, results) +} diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index 7c1218d15f..0dd0b3f4bb 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -4,6 +4,7 @@ import ( "context" "embed" "flag" + "fmt" "path" "path/filepath" "runtime" @@ -64,8 +65,9 @@ var ( manifests embed.FS k8sClient client.Client resourceManager framework.ResourceManager - portForwardStopCh = make(chan struct{}, 1) + portForwardStopCh chan struct{} portFwdPort int + portFwdHTTPSPort int timeoutConfig framework.TimeoutConfig localChartPath string address string @@ -75,8 +77,10 @@ var ( ) const ( - releaseName = "ngf-test" - ngfNamespace = "nginx-gateway" + releaseName = "ngf-test" + ngfNamespace = "nginx-gateway" + ngfHTTPForwardedPort = 10080 + ngfHTTPSForwardedPort = 10443 ) type setupConfig struct { @@ -179,8 +183,12 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { Expect(podNames).ToNot(BeEmpty()) if *serviceType != "LoadBalancer" { - portFwdPort, err = framework.PortForward(k8sConfig, installCfg.Namespace, podNames[0], portForwardStopCh) + ports := []string{fmt.Sprintf("%d:80", ngfHTTPForwardedPort), fmt.Sprintf("%d:443", ngfHTTPSForwardedPort)} + portForwardStopCh = make(chan struct{}) + err = framework.PortForward(k8sConfig, installCfg.Namespace, podNames[0], ports, portForwardStopCh) address = "127.0.0.1" + portFwdPort = ngfHTTPForwardedPort + portFwdHTTPSPort = ngfHTTPSForwardedPort } else { address, err = resourceManager.GetLBIPAddress(installCfg.Namespace) } @@ -189,7 +197,9 @@ func setup(cfg setupConfig, extraInstallArgs ...string) { func teardown(relName string) { if portFwdPort != 0 { - portForwardStopCh <- struct{}{} + close(portForwardStopCh) + portFwdPort = 0 + portFwdHTTPSPort = 0 } cfg := framework.InstallationConfig{ @@ -242,15 +252,18 @@ var _ = BeforeSuite(func() { cfg.nfr = isNFR(labelFilter) // Skip deployment if: - // - running upgrade test (this test will deploy its own version) - // - running longevity teardown (deployment will already exist) - // - running telemetry test (NGF will be deployed as part of the test) - if strings.Contains(labelFilter, "upgrade") || - strings.Contains(labelFilter, "longevity-teardown") || - strings.Contains(labelFilter, "telemetry") || - strings.Contains(labelFilter, "graceful-recovery") { - - cfg.deploy = false + skipSubstrings := []string{ + "upgrade", // - running upgrade test (this test will deploy its own version) + "longevity-teardown", // - running longevity teardown (deployment will already exist) + "telemetry", // - running telemetry test (NGF will be deployed as part of the test) + "graceful-recovery", // - running graceful recovery test (this test will deploy its own version) + "scale", // - running scale test (this test will deploy its own version) + } + for _, s := range skipSubstrings { + if strings.Contains(labelFilter, s) { + cfg.deploy = false + break + } } // use a different release name for longevity to allow us to filter on a specific label when collecting @@ -283,5 +296,6 @@ func isNFR(labelFilter string) bool { strings.Contains(labelFilter, "longevity") || strings.Contains(labelFilter, "performance") || strings.Contains(labelFilter, "upgrade") || - strings.Contains(labelFilter, "graceful-recovery") + strings.Contains(labelFilter, "graceful-recovery") || + strings.Contains(labelFilter, "scale") } diff --git a/tests/suite/upgrade_test.go b/tests/suite/upgrade_test.go index 09b3ecb8cf..e749292078 100644 --- a/tests/suite/upgrade_test.go +++ b/tests/suite/upgrade_test.go @@ -76,7 +76,7 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { AfterEach(func() { Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.DeleteNamespace(ns.Name)).To(Succeed()) resultsFile.Close() }) @@ -157,7 +157,7 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { } buf := new(bytes.Buffer) - encoder := framework.NewCSVEncoder(buf) + encoder := framework.NewVegetaCSVEncoder(buf) for _, res := range results { res := res Expect(encoder.Encode(&res)).To(Succeed()) @@ -173,8 +173,9 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { csvFile.Close() pngName := framework.CreateResultsFilename("png", scheme, *plusEnabled) - output, err := framework.GeneratePNG(resultsDir, csvName, pngName) - Expect(err).ToNot(HaveOccurred(), string(output)) + Expect( + framework.GenerateRequestsPNG(resultsDir, csvName, pngName), + ).To(Succeed()) metricsCh <- &metricsRes }(test) @@ -251,7 +252,7 @@ var _ = Describe("Upgrade testing", Label("nfr", "upgrade"), func() { _, err := fmt.Fprint(resultsFile, res.testName) Expect(err).ToNot(HaveOccurred()) - Expect(framework.WriteResults(resultsFile, res.metrics)).To(Succeed()) + Expect(framework.WriteMetricsResults(resultsFile, res.metrics)).To(Succeed()) link := fmt.Sprintf("\n\n![%[1]v.png](%[1]v.png)\n", res.scheme) if *plusEnabled {