From e64fcd6d63cb87d76e83fc7e718f387953a0ff02 Mon Sep 17 00:00:00 2001 From: Rakshit Gondwal <98955085+rakshitgondwal@users.noreply.github.com> Date: Thu, 10 Aug 2023 16:23:03 +0530 Subject: [PATCH] feat: update prometheus api to support `range.step` (#1801) Signed-off-by: Rakshit Gondwal Signed-off-by: Florian Bacher Co-authored-by: Florian Bacher --- .../common/providers/prometheus/prometheus.go | 90 +++++++-- .../providers/prometheus/prometheus_test.go | 178 ++++++++++++++++-- 2 files changed, 236 insertions(+), 32 deletions(-) diff --git a/metrics-operator/controllers/common/providers/prometheus/prometheus.go b/metrics-operator/controllers/common/providers/prometheus/prometheus.go index 4bfb667e31..6d1da44185 100644 --- a/metrics-operator/controllers/common/providers/prometheus/prometheus.go +++ b/metrics-operator/controllers/common/providers/prometheus/prometheus.go @@ -2,6 +2,7 @@ package prometheus import ( "context" + "encoding/json" "fmt" //nolint:gci "net/http" //nolint:gci "time" @@ -13,6 +14,10 @@ import ( "github.com/prometheus/common/model" ) +var errCouldNotCast = fmt.Errorf("could not cast result") +var errNoValues = fmt.Errorf("no values in query result") +var errTooManyValues = fmt.Errorf("too many values in query result") + type KeptnPrometheusProvider struct { Log logr.Logger HttpClient http.Client @@ -37,7 +42,7 @@ func (r *KeptnPrometheusProvider) EvaluateQuery(ctx context.Context, metric metr if len(warnings) != 0 { r.Log.Info("Prometheus API returned warnings: " + warnings[0]) } - return getResultForMatrix(result, r) + return getResultForMatrix(result) } else { result, warnings, err := evaluateQueryWithoutRange(ctx, metric, r, api) if err != nil { @@ -46,10 +51,31 @@ func (r *KeptnPrometheusProvider) EvaluateQuery(ctx context.Context, metric metr if len(warnings) != 0 { r.Log.Info("Prometheus API returned warnings: " + warnings[0]) } - return getResultForVector(result, r) + return getResultForVector(result) } } +// EvaluateQueryForStep fetches the metric values from prometheus provider +func (r *KeptnPrometheusProvider) EvaluateQueryForStep(ctx context.Context, metric metricsapi.KeptnMetric, provider metricsapi.KeptnMetricsProvider) ([]string, []byte, error) { + ctx, cancel := context.WithTimeout(ctx, 20*time.Second) + defer cancel() + + client, err := promapi.NewClient(promapi.Config{Address: provider.Spec.TargetServer, Client: &r.HttpClient}) + if err != nil { + return nil, nil, err + } + + api := prometheus.NewAPI(client) + result, warnings, err := evaluateQueryWithRange(ctx, metric, r, api) + if err != nil { + return nil, nil, err + } + if len(warnings) != 0 { + r.Log.Info("Prometheus API returned warnings: " + warnings[0]) + } + return getResultForStepMatrix(result) +} + func evaluateQueryWithRange(ctx context.Context, metric metricsapi.KeptnMetric, r *KeptnPrometheusProvider, api prometheus.API) (model.Value, prometheus.Warnings, error) { queryTime := time.Now().UTC() // Get the duration @@ -57,18 +83,28 @@ func evaluateQueryWithRange(ctx context.Context, metric metricsapi.KeptnMetric, if err != nil { return nil, nil, err } + var stepInterval time.Duration + if metric.Spec.Range.Step != "" { + stepTime := metric.Spec.Range.Step + stepInterval, err = time.ParseDuration(stepTime) + if err != nil { + return nil, nil, err + } + } else { + stepInterval = queryInterval + } // Convert type Duration to type Time startTime := queryTime.Add(-queryInterval).UTC() r.Log.Info(fmt.Sprintf( "Running query: /api/v1/query_range?query=%s&start=%d&end=%d&step=%v", metric.Spec.Query, startTime.Unix(), queryTime.Unix(), - queryInterval, + stepInterval, )) queryRange := prometheus.Range{ Start: startTime, End: queryTime, - Step: queryInterval, + Step: stepInterval, } result, warnings, err := api.QueryRange( ctx, @@ -101,11 +137,11 @@ func evaluateQueryWithoutRange(ctx context.Context, metric metricsapi.KeptnMetri return result, warnings, nil } -func getResultForMatrix(result model.Value, r *KeptnPrometheusProvider) (string, []byte, error) { +func getResultForMatrix(result model.Value) (string, []byte, error) { // check if we can cast the result to a matrix resultMatrix, ok := result.(model.Matrix) if !ok { - return "", nil, fmt.Errorf("could not cast result") + return "", nil, errCouldNotCast } // We are only allowed to return one value, if not the query may be malformed // we are using two different errors to give the user more information about the result @@ -113,11 +149,9 @@ func getResultForMatrix(result model.Value, r *KeptnPrometheusProvider) (string, // parameter as the interval itself, hence there can only be one value. // This logic should be changed, once we work onto the aggregation functions. if len(resultMatrix) == 0 { - r.Log.Info("No values in query result") - return "", nil, fmt.Errorf("no values in query result") + return "", nil, errNoValues } else if len(resultMatrix) > 1 { - r.Log.Info("Too many values in the query result") - return "", nil, fmt.Errorf("too many values in the query result") + return "", nil, errTooManyValues } value := resultMatrix[0].Values[0].Value.String() b, err := resultMatrix[0].Values[0].Value.MarshalJSON() @@ -127,20 +161,18 @@ func getResultForMatrix(result model.Value, r *KeptnPrometheusProvider) (string, return value, b, nil } -func getResultForVector(result model.Value, r *KeptnPrometheusProvider) (string, []byte, error) { +func getResultForVector(result model.Value) (string, []byte, error) { // check if we can cast the result to a vector resultVector, ok := result.(model.Vector) if !ok { - return "", nil, fmt.Errorf("could not cast result") + return "", nil, errCouldNotCast } // We are only allowed to return one value, if not the query may be malformed // we are using two different errors to give the user more information about the result if len(resultVector) == 0 { - r.Log.Info("No values in query result") - return "", nil, fmt.Errorf("no values in query result") + return "", nil, errNoValues } else if len(resultVector) > 1 { - r.Log.Info("Too many values in the query result") - return "", nil, fmt.Errorf("too many values in the query result") + return "", nil, errTooManyValues } value := resultVector[0].Value.String() b, err := resultVector[0].Value.MarshalJSON() @@ -149,3 +181,29 @@ func getResultForVector(result model.Value, r *KeptnPrometheusProvider) (string, } return value, b, nil } + +func getResultForStepMatrix(result model.Value) ([]string, []byte, error) { + // check if we can cast the result to a matrix + resultMatrix, ok := result.(model.Matrix) + if !ok { + return nil, nil, errCouldNotCast + } + + if len(resultMatrix) == 0 { + return nil, nil, errNoValues + } else if len(resultMatrix) > 1 { + return nil, nil, errTooManyValues + } + + resultSlice := make([]string, len(resultMatrix[0].Values)) + for i, value := range resultMatrix[0].Values { + resultSlice[i] = value.Value.String() + } + + b, err := json.Marshal(resultSlice) + if err != nil { + return nil, nil, err + } + + return resultSlice, b, nil +} diff --git a/metrics-operator/controllers/common/providers/prometheus/prometheus_test.go b/metrics-operator/controllers/common/providers/prometheus/prometheus_test.go index cd5e6731cc..a18cb74d64 100644 --- a/metrics-operator/controllers/common/providers/prometheus/prometheus_test.go +++ b/metrics-operator/controllers/common/providers/prometheus/prometheus_test.go @@ -7,6 +7,7 @@ import ( "testing" metricsapi "github.com/keptn/lifecycle-toolkit/metrics-operator/api/v1alpha3" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" ctrl "sigs.k8s.io/controller-runtime" @@ -24,14 +25,22 @@ const promEmptyDataPayloadWithRange = "{\"status\":\"success\",\"data\":{\"resul const promVectorPayloadWithRange = "{\"status\":\"success\",\"data\":{\"resultType\":\"vector\",\"result\":[[]]}}" const promMultiPointPayloadWithRange = "{\"status\":\"success\",\"data\":{\"resultType\":\"matrix\",\"result\":[{\"metric\":{\"__name__\":\"kube_pod_info\",\"container\":\"kube-rbac-proxy-main\",\"created_by_kind\":\"DaemonSet\",\"created_by_name\":\"kindnet\",\"host_ip\":\"172.18.0.2\",\"host_network\":\"true\",\"instance\":\"10.244.0.24:8443\",\"job\":\"kube-state-metrics\",\"namespace\":\"kube-system\",\"node\":\"kind-control-plane\",\"pod\":\"kindnet-llt85\",\"pod_ip\":\"172.18.0.2\",\"uid\":\"0bb9d9db-2658-439f-aed9-ab3e8502397d\"},\"values\":[[1669714193.275,\"1\"]]},{\"metric\":{\"__name__\":\"kube_pod_info\",\"container\":\"kube-rbac-proxy-main\",\"created_by_kind\":\"DaemonSet\",\"created_by_name\":\"kindnet\",\"host_ip\":\"172.18.0.2\",\"host_network\":\"true\",\"instance\":\"10.244.0.24:8443\",\"job\":\"kube-state-metrics\",\"namespace\":\"kube-system\",\"node\":\"kind-control-plane\",\"pod\":\"kindnet-llt85\",\"pod_ip\":\"172.18.0.2\",\"uid\":\"0bb9d9db-2658-439f-aed9-ab3e8502397d\"},\"values\":[[1669714193.275,\"1\"]]}]}}" +const promWarnPayloadWithRangeAndStep = "{\"status\":\"success\",\"warnings\":[\"awarning\"],\"data\":{\"resultType\":\"matrix\",\"result\":[{\"metric\":{\"__name__\":\"kube_pod_info\",\"container\":\"kube-rbac-proxy-main\",\"created_by_kind\":\"DaemonSet\",\"created_by_name\":\"kindnet\",\"host_ip\":\"172.18.0.2\",\"host_network\":\"true\",\"instance\":\"10.244.0.24:8443\",\"job\":\"kube-state-metrics\",\"namespace\":\"kube-system\",\"node\":\"kind-control-plane\",\"pod\":\"kindnet-llt85\",\"pod_ip\":\"172.18.0.2\",\"uid\":\"0bb9d9db-2658-439f-aed9-ab3e8502397d\"},\"values\":[[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"]]}]}}" +const promPayloadWithRangeAndStep = "{\"status\":\"success\",\"data\":{\"resultType\":\"matrix\",\"result\":[{\"metric\":{\"__name__\":\"kube_pod_info\",\"container\":\"kube-rbac-proxy-main\",\"created_by_kind\":\"DaemonSet\",\"created_by_name\":\"kindnet\",\"host_ip\":\"172.18.0.2\",\"host_network\":\"true\",\"instance\":\"10.244.0.24:8443\",\"job\":\"kube-state-metrics\",\"namespace\":\"kube-system\",\"node\":\"kind-control-plane\",\"pod\":\"kindnet-llt85\",\"pod_ip\":\"172.18.0.2\",\"uid\":\"0bb9d9db-2658-439f-aed9-ab3e8502397d\"},\"values\":[[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"]]}]}}" +const promEmptyDataPayloadWithRangeAndStep = "{\"status\":\"success\",\"data\":{\"resultType\":\"matrix\",\"result\":[[]]}}" +const promVectorPayloadWithRangeAndStep = "{\"status\":\"success\",\"data\":{\"resultType\":\"vector\",\"result\":[[]]}}" +const promMultiPointPayloadWithRangeAndStep = "{\"status\":\"success\",\"data\":{\"resultType\":\"matrix\",\"result\":[{\"metric\":{\"__name__\":\"kube_pod_info\",\"container\":\"kube-rbac-proxy-main\",\"created_by_kind\":\"DaemonSet\",\"created_by_name\":\"kindnet\",\"host_ip\":\"172.18.0.2\",\"host_network\":\"true\",\"instance\":\"10.244.0.24:8443\",\"job\":\"kube-state-metrics\",\"namespace\":\"kube-system\",\"node\":\"kind-control-plane\",\"pod\":\"kindnet-llt85\",\"pod_ip\":\"172.18.0.2\",\"uid\":\"0bb9d9db-2658-439f-aed9-ab3e8502397d\"},\"values\":[[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"]]},{\"metric\":{\"__name__\":\"kube_pod_info\",\"container\":\"kube-rbac-proxy-main\",\"created_by_kind\":\"DaemonSet\",\"created_by_name\":\"kindnet\",\"host_ip\":\"172.18.0.2\",\"host_network\":\"true\",\"instance\":\"10.244.0.24:8443\",\"job\":\"kube-state-metrics\",\"namespace\":\"kube-system\",\"node\":\"kind-control-plane\",\"pod\":\"kindnet-llt85\",\"pod_ip\":\"172.18.0.2\",\"uid\":\"0bb9d9db-2658-439f-aed9-ab3e8502397d\"},\"values\":[[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"],[1669714193.275,\"1\"]]}]}}" + func Test_prometheus(t *testing.T) { tests := []struct { - name string - in string - out string - outraw []byte - wantError bool - hasRange bool + name string + in string + out string + outForStep []string + outraw []byte + wantError bool + hasRange bool + hasStep bool }{ { name: "wrong data with no range", @@ -83,6 +92,7 @@ func Test_prometheus(t *testing.T) { outraw: []byte("\"1\""), wantError: false, hasRange: true, + hasStep: false, }, { name: "multiple datapoint with range", @@ -90,6 +100,7 @@ func Test_prometheus(t *testing.T) { out: "", wantError: true, hasRange: true, + hasStep: false, }, { name: "empty datapoint with range", @@ -97,6 +108,7 @@ func Test_prometheus(t *testing.T) { out: "", wantError: true, hasRange: true, + hasStep: false, }, { name: "unsupported answer type with range", @@ -104,6 +116,7 @@ func Test_prometheus(t *testing.T) { out: "", wantError: true, hasRange: false, + hasStep: false, }, { name: "happy path with range", @@ -112,6 +125,49 @@ func Test_prometheus(t *testing.T) { outraw: []byte("\"1\""), wantError: false, hasRange: true, + hasStep: false, + }, + { + name: "warnings with range and step", + in: promWarnPayloadWithRangeAndStep, + outForStep: []string{"1", "1", "1", "1", "1"}, + outraw: []byte("[\"1\",\"1\",\"1\",\"1\",\"1\"]"), + wantError: false, + hasRange: true, + hasStep: true, + }, + { + name: "multiple datapoint with range and step", + in: promMultiPointPayloadWithRangeAndStep, + outForStep: nil, + wantError: true, + hasRange: true, + hasStep: true, + }, + { + name: "empty datapoint with range and step", + in: promEmptyDataPayloadWithRangeAndStep, + outForStep: nil, + wantError: true, + hasRange: true, + hasStep: true, + }, + { + name: "unsupported answer type with range and step", + in: promVectorPayloadWithRangeAndStep, + outForStep: nil, + wantError: true, + hasRange: true, + hasStep: true, + }, + { + name: "happy path with range and step", + in: promPayloadWithRangeAndStep, + outForStep: []string{"1", "1", "1", "1", "1"}, + outraw: []byte("[\"1\",\"1\",\"1\",\"1\",\"1\"]"), + wantError: false, + hasRange: true, + hasStep: true, }, } @@ -152,18 +208,108 @@ func Test_prometheus(t *testing.T) { t.Errorf("want error: %t, got: %v", tt.wantError, e) } case true: - obj := metricsapi.KeptnMetric{ - Spec: metricsapi.KeptnMetricSpec{ - Query: "my-query", - Range: &metricsapi.RangeSpec{Interval: "5m"}, - }, + if tt.hasStep { + obj := metricsapi.KeptnMetric{ + Spec: metricsapi.KeptnMetricSpec{ + Query: "my-query", + Range: &metricsapi.RangeSpec{ + Interval: "5m", + Step: "1m", + Aggregation: "max", + }, + }, + } + r, raw, e := kpp.EvaluateQueryForStep(context.TODO(), obj, p) + require.Equal(t, tt.outForStep, r) + require.Equal(t, tt.outraw, raw) + if tt.wantError != (e != nil) { + t.Errorf("want error: %t, got: %v", tt.wantError, e) + } + } else { + obj := metricsapi.KeptnMetric{ + Spec: metricsapi.KeptnMetricSpec{ + Query: "my-query", + Range: &metricsapi.RangeSpec{Interval: "5m"}, + }, + } + r, raw, e := kpp.EvaluateQuery(context.TODO(), obj, p) + require.Equal(t, tt.out, r) + require.Equal(t, tt.outraw, raw) + if tt.wantError != (e != nil) { + t.Errorf("want error: %t, got: %v", tt.wantError, e) + } } - r, raw, e := kpp.EvaluateQuery(context.TODO(), obj, p) - require.Equal(t, tt.out, r) - require.Equal(t, tt.outraw, raw) - if tt.wantError != (e != nil) { - t.Errorf("want error: %t, got: %v", tt.wantError, e) + } + }) + } +} + +func Test_resultsForMatrix(t *testing.T) { + tests := []struct { + name string + result model.Value + wantResultSlice []string + wantResultString string + wantRaw []byte + wantErr bool + hasStep bool + }{ + // this is to cover the scenario where we get an empty result matrix from the prometheus API + // right now, the prometheus client returns an error in the QueryRange function if that is the case, + // but we should do a check for an empty matrix here as well in case the behavior of the QueryRange function + // changes + { + name: "empty matrix with step - return err", + result: model.Matrix{}, + wantResultSlice: nil, + wantRaw: nil, + wantErr: true, + hasStep: true, + }, + { + name: "empty matrix without step- return err", + result: model.Matrix{}, + wantResultString: "", + wantRaw: nil, + wantErr: true, + hasStep: false, + }, + { + name: "unsupported matrix with step- return err", + result: model.Vector{}, + wantResultString: "", + wantRaw: nil, + wantErr: true, + hasStep: true, + }, + { + name: "unsupported matrix without step- return err", + result: model.Vector{}, + wantResultString: "", + wantRaw: nil, + wantErr: true, + hasStep: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + switch tt.hasStep { + case true: + resultSlice, raw, err := getResultForStepMatrix(tt.result) + if (err != nil) != tt.wantErr { + t.Errorf("getResultForStepMatrix() error = %v, wantErr %v", err, tt.wantErr) + return + } + require.Equal(t, tt.wantResultSlice, resultSlice) + require.Equal(t, tt.wantRaw, raw) + case false: + resultString, raw, err := getResultForMatrix(tt.result) + if (err != nil) != tt.wantErr { + t.Errorf("getResultForMatrix() error = %v, wantErr %v", err, tt.wantErr) + return } + require.Equal(t, tt.wantResultString, resultString) + require.Equal(t, tt.wantRaw, raw) } }) }