From 37d35ba223cc1dec8517fb605b473c829f7bcce4 Mon Sep 17 00:00:00 2001 From: zirain Date: Sat, 2 Mar 2024 10:45:21 +0800 Subject: [PATCH] e2e: add test for BackendTrafficPolicy retry (#2738) * e2e: add test for BackendTrafficPolicy retry Signed-off-by: zirain * lint Signed-off-by: zirain --------- Signed-off-by: zirain Co-authored-by: Xunzhuo --- examples/prometheus/helm-values.yaml | 25 +++++++ go.sum | 4 ++ test/e2e/testdata/retry.yaml | 41 +++++++++++ test/e2e/tests/retry.go | 89 ++++++++++++++++++++++++ test/e2e/utils/prometheus/prometheus.go | 92 +++++++++++++++++++++++++ tools/make/kube.mk | 9 ++- 6 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 examples/prometheus/helm-values.yaml create mode 100644 test/e2e/testdata/retry.yaml create mode 100644 test/e2e/tests/retry.go create mode 100644 test/e2e/utils/prometheus/prometheus.go diff --git a/examples/prometheus/helm-values.yaml b/examples/prometheus/helm-values.yaml new file mode 100644 index 00000000000..9de08834491 --- /dev/null +++ b/examples/prometheus/helm-values.yaml @@ -0,0 +1,25 @@ +# To simplify the deployment, disable non-essential components +alertmanager: + enabled: false +prometheus-pushgateway: + enabled: false +kube-state-metrics: + enabled: false +prometheus-node-exporter: + enabled: false +server: + fullnameOverride: prometheus + persistentVolume: + enabled: false + readinessProbeInitialDelay: 0 + global: + # Speed up scraping a bit from the default + scrape_interval: 15s + service: + # use LoadBalancer to expose prometheus + type: LoadBalancer + # use dockerhub + image: + repository: prom/prometheus + securityContext: null + diff --git a/go.sum b/go.sum index 3e6ac5c9f8f..36664d62727 100644 --- a/go.sum +++ b/go.sum @@ -286,6 +286,8 @@ github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJS github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.8/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= @@ -354,6 +356,8 @@ github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8m github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= diff --git a/test/e2e/testdata/retry.yaml b/test/e2e/testdata/retry.yaml new file mode 100644 index 00000000000..bacb78b1d60 --- /dev/null +++ b/test/e2e/testdata/retry.yaml @@ -0,0 +1,41 @@ +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: BackendTrafficPolicy +metadata: + name: retry-policy + namespace: gateway-conformance-infra +spec: + targetRef: + group: gateway.networking.k8s.io + kind: HTTPRoute + name: retry-route + namespace: gateway-conformance-infra + retry: + numRetries: 5 + perRetry: + backOff: + baseInterval: 100ms + maxInterval: 10s + timeout: 250ms + retryOn: + httpStatusCodes: + - 500 + triggers: + - connect-failure + - retriable-status-codes +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: retry-route + namespace: gateway-conformance-infra +spec: + parentRefs: + - name: same-namespace + rules: + - matches: + - path: + type: PathPrefix + value: /status + backendRefs: + - name: infra-backend-v1 + port: 8080 diff --git a/test/e2e/tests/retry.go b/test/e2e/tests/retry.go new file mode 100644 index 00000000000..c2acfecf34c --- /dev/null +++ b/test/e2e/tests/retry.go @@ -0,0 +1,89 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +//go:build e2e +// +build e2e + +package tests + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/gateway-api/conformance/utils/http" + "sigs.k8s.io/gateway-api/conformance/utils/kubernetes" + "sigs.k8s.io/gateway-api/conformance/utils/suite" + + "github.com/envoyproxy/gateway/test/e2e/utils/prometheus" +) + +func init() { + ConformanceTests = append(ConformanceTests, RetryTest) + +} + +var RetryTest = suite.ConformanceTest{ + ShortName: "Retry", + Description: "Test that the BackendTrafficPolicy API implementation supports retry", + Manifests: []string{"testdata/retry.yaml"}, + Test: func(t *testing.T, suite *suite.ConformanceTestSuite) { + t.Run("retry-on-500", func(t *testing.T) { + ns := "gateway-conformance-infra" + routeNN := types.NamespacedName{Name: "retry-route", Namespace: ns} + gwNN := types.NamespacedName{Name: "same-namespace", Namespace: ns} + gwAddr := kubernetes.GatewayAndHTTPRoutesMustBeAccepted(t, suite.Client, suite.TimeoutConfig, suite.ControllerName, kubernetes.NewGatewayRef(gwNN), routeNN) + + expectedResponse := http.ExpectedResponse{ + Request: http.Request{ + Path: "/status/500", + }, + Response: http.Response{ + StatusCode: 500, + }, + Namespace: ns, + } + + promAddr, err := prometheus.Address(suite.Client, types.NamespacedName{Name: "prometheus", Namespace: "monitoring"}) + require.NoError(t, err) + promQL := fmt.Sprintf(`envoy_cluster_upstream_rq_retry{envoy_cluster_name="httproute/%s/%s/rule/0"}`, routeNN.Namespace, routeNN.Name) + + before := float64(0) + v, err := prometheus.QuerySum(promAddr, promQL) + if err == nil { + before = v + } + t.Logf("query count %s before: %v", promQL, before) + + req := http.MakeRequest(t, &expectedResponse, gwAddr, "HTTP", "http") + cReq, cResp, err := suite.RoundTripper.CaptureRoundTrip(req) + if err != nil { + t.Errorf("failed to get expected response: %v", err) + } + + if err := http.CompareRequest(t, &req, cReq, cResp, expectedResponse); err != nil { + t.Errorf("failed to compare request and response: %v", err) + } + + http.AwaitConvergence(t, + suite.TimeoutConfig.RequiredConsecutiveSuccesses, + suite.TimeoutConfig.MaxTimeToConsistency, + func(_ time.Duration) bool { + // check retry stats from Prometheus + v, err := prometheus.QuerySum(promAddr, promQL) + if err != nil { + return false + } + t.Logf("query count %s after: %v", promQL, v) + + delta := int64(v - before) + // numRetries is 5, so delta mod 5 equals 0 + return delta > 0 && delta%5 == 0 + }) + }) + }, +} diff --git a/test/e2e/utils/prometheus/prometheus.go b/test/e2e/utils/prometheus/prometheus.go new file mode 100644 index 00000000000..641f802609f --- /dev/null +++ b/test/e2e/utils/prometheus/prometheus.go @@ -0,0 +1,92 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +//go:build e2e +// +build e2e + +package prometheus + +import ( + "context" + "fmt" + "time" + + prom "github.com/prometheus/client_golang/api" + prompapiv1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/common/model" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func Address(c client.Client, nn types.NamespacedName) (string, error) { + svc := &corev1.Service{} + if err := c.Get(context.TODO(), nn, svc); err != nil { + return "", fmt.Errorf("failed to get service: %w", err) + } + for _, ing := range svc.Status.LoadBalancer.Ingress { + if ing.IP != "" { + return fmt.Sprintf("http://%s", ing.IP), nil + } + } + + return "", fmt.Errorf("no ingress found") +} + +func RawQuery(address string, promQL string) (model.Value, error) { + c, err := prom.NewClient(prom.Config{Address: address}) + if err != nil { + return nil, err + } + + v, _, err := prompapiv1.NewAPI(c).Query(context.Background(), promQL, time.Now()) + if err != nil { + return nil, err + } + + switch v.Type() { + case model.ValScalar, model.ValString: + return v, nil + case model.ValVector: + value := v.(model.Vector) + if len(value) == 0 { + return nil, fmt.Errorf("value not found (query: %v)", promQL) + } + return v, nil + + default: + return nil, fmt.Errorf("unhandled value type: %v", v.Type()) + } +} + +func QuerySum(address string, promQL string) (float64, error) { + val, err := RawQuery(address, promQL) + if err != nil { + return 0, err + } + got, err := sum(val) + if err != nil { + return 0, fmt.Errorf("could not find metric value: %w", err) + } + return got, nil +} + +func sum(val model.Value) (float64, error) { + if val.Type() != model.ValVector { + return 0, fmt.Errorf("value not a model.Vector; was %s", val.Type().String()) + } + + value := val.(model.Vector) + + valueCount := 0.0 + for _, sample := range value { + valueCount += float64(sample.Value) + } + + if valueCount > 0.0 { + return valueCount, nil + } + return 0, fmt.Errorf("value not found") +} diff --git a/tools/make/kube.mk b/tools/make/kube.mk index 408717618c0..d5bebb1f94d 100644 --- a/tools/make/kube.mk +++ b/tools/make/kube.mk @@ -125,12 +125,13 @@ else endif .PHONY: prepare-e2e -prepare-e2e: prepare-helm-repo install-fluent-bit install-loki install-tempo install-otel-collector +prepare-e2e: prepare-helm-repo install-fluent-bit install-loki install-tempo install-otel-collector install-prometheus @$(LOG_TARGET) kubectl rollout status daemonset fluent-bit -n monitoring --timeout 5m kubectl rollout status statefulset loki -n monitoring --timeout 5m kubectl rollout status statefulset tempo -n monitoring --timeout 5m kubectl rollout status deployment otel-collector -n monitoring --timeout 5m + kubectl rollout status deployment prometheus -n monitoring --timeout 5m .PHONY: prepare-helm-repo prepare-helm-repo: @@ -138,6 +139,7 @@ prepare-helm-repo: helm repo add fluent https://fluent.github.io/helm-charts helm repo add grafana https://grafana.github.io/helm-charts helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm repo update .PHONY: install-fluent-bit @@ -155,6 +157,11 @@ install-tempo: @$(LOG_TARGET) helm upgrade --install tempo grafana/tempo -f examples/tempo/helm-values.yaml -n monitoring --create-namespace --version $(TEMPO_CHART_VERSION) +.PHONY: install-prometheus +install-prometheus: + @$(LOG_TARGET) + helm upgrade --install prometheus prometheus-community/prometheus -f examples/prometheus/helm-values.yaml -n monitoring --create-namespace + .PHONY: install-otel-collector install-otel-collector: @$(LOG_TARGET)