Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(operator): Introduce Prometheus evaluation #183

Merged
merged 6 commits into from
Oct 18, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions operator/api/v1alpha1/keptnevaluation_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type KeptnEvaluationStatus struct {
type EvaluationStatusItem struct {
Value string `json:"value"`
Status common.KeptnState `json:"status"`
Error string `json:"error,omitempty"`
odubajDT marked this conversation as resolved.
Show resolved Hide resolved
}

//+kubebuilder:object:root=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ spec:
evaluationStatus:
additionalProperties:
properties:
error:
type: string
status:
type: string
value:
Expand Down
2 changes: 1 addition & 1 deletion operator/config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ kind: Kustomization
images:
- name: controller
newName: docker.io/annadreal/keptn-lifecycle-operator
newTag: "202210171665998375"
newTag: "202210171665999134"
2 changes: 1 addition & 1 deletion operator/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ spec:
- name: OTEL_COLLECTOR_URL
value: otel-collector:4317
- name: FUNCTION_RUNNER_IMAGE
value: ghcr.io/keptn-sandbox/functions-runtime:v0.2.0 #x-release-please-version
value: docker.io/keptn-sandbox/functions-runtime:0.2.0 #x-release-please-version
RealAnna marked this conversation as resolved.
Show resolved Hide resolved
RealAnna marked this conversation as resolved.
Show resolved Hide resolved
securityContext:
allowPrivilegeEscalation: false
capabilities:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ metadata:
spec:
source: prometheus
objectives:
- name: query-1 #string
query: "xxxx" #string: promQL query
evaluationTarget: <20 #string: can only be starting with < or >
- name: query-2
query: "yyyy"
evaluationTarget: >4
- name: prometheus
query: "sum(prometheus_engine_query_duration_seconds_count)"
evaluationTarget: ">1000" #string: can only be starting with < or >
- name: prometheus2
query: "sum(prometheus_engine_query_duration_seconds_count)"
evaluationTarget: "<1000" #string: can only be starting with < or >

124 changes: 97 additions & 27 deletions operator/controllers/keptnevaluation/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,13 @@ import (
"fmt"
"time"

"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"math"
"net/http"
"strconv"

promapi "github.com/prometheus/client_golang/api"
prometheus "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/propagation"
Expand All @@ -33,7 +37,9 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/predicate"

"github.com/go-logr/logr"
klcv1alpha1 "github.com/keptn-sandbox/lifecycle-controller/operator/api/v1alpha1"
Expand Down Expand Up @@ -96,11 +102,10 @@ func (r *KeptnEvaluationReconciler) Reconcile(ctx context.Context, req ctrl.Requ

if evaluation.Status.RetryCount >= evaluation.Spec.Retries {
r.recordEvent("Warning", evaluation, "ReconcileTimeOut", "retryCount exceeded")
err := fmt.Errorf("RetryCount for evaluation exceeded")
err := fmt.Errorf("retryCount for evaluation exceeded")
span.SetStatus(codes.Error, err.Error())
evaluation.Status.OverallStatus = common.StateFailed
r.updateFinishedEvaluationMetrics(ctx, evaluation, span)

return ctrl.Result{}, err
}

Expand All @@ -111,10 +116,11 @@ func (r *KeptnEvaluationReconciler) Reconcile(ctx context.Context, req ctrl.Requ
}
evaluationDefinition, evaluationProvider, err := r.fetchDefinitionAndProvider(ctx, namespacedDefinition)
if err != nil {
return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil
}

if evaluationDefinition == nil || evaluationProvider == nil {
if errors.IsNotFound(err) {
r.Log.Info(err.Error() + ", ignoring error since object must be deleted")
return ctrl.Result{Requeue: true, RequeueAfter: 30 * time.Second}, nil
}
r.Log.Error(err, "Failed to retrieve a resource")
thschue marked this conversation as resolved.
Show resolved Hide resolved
return ctrl.Result{}, nil
}

Expand Down Expand Up @@ -213,12 +219,6 @@ func (r *KeptnEvaluationReconciler) fetchDefinitionAndProvider(ctx context.Conte
evaluationDefinition := &klcv1alpha1.KeptnEvaluationDefinition{}

if err := r.Client.Get(ctx, namespacedDefinition, evaluationDefinition); err != nil {
if errors.IsNotFound(err) {
// taking down all associated K8s resources is handled by K8s
r.Log.Info("KeptnEvaluationDefinition resource not found. Ignoring since object must be deleted")
return nil, nil, nil
}
r.Log.Error(err, "Failed to get the KeptnEvaluationDefinition")
return nil, nil, err
}

Expand All @@ -230,12 +230,6 @@ func (r *KeptnEvaluationReconciler) fetchDefinitionAndProvider(ctx context.Conte
evaluationProvider := &klcv1alpha1.KeptnEvaluationProvider{}

if err := r.Client.Get(ctx, namespacedProvider, evaluationProvider); err != nil {
if errors.IsNotFound(err) {
// taking down all associated K8s resources is handled by K8s
r.Log.Info("KeptnEvaluationProvider resource not found. Ignoring since object must be deleted")
return nil, nil, nil
}
r.Log.Error(err, "Failed to get the KeptnEvaluationProvider")
return nil, nil, err
}

Expand All @@ -245,19 +239,95 @@ func (r *KeptnEvaluationReconciler) fetchDefinitionAndProvider(ctx context.Conte
func (r *KeptnEvaluationReconciler) queryEvaluation(objective klcv1alpha1.Objective, provider klcv1alpha1.KeptnEvaluationProvider) *klcv1alpha1.EvaluationStatusItem {
query := &klcv1alpha1.EvaluationStatusItem{
Value: "",
Status: common.StateSucceeded, //setting status per default to failed
Status: common.StateFailed, //setting status per default to failed
}

queryTime := time.Now().UTC()
r.Log.Info("Running query: /api/v1/query?query=" + objective.Query + "&time=" + queryTime.String())

client, err := promapi.NewClient(promapi.Config{Address: provider.Spec.TargetServer, Client: &http.Client{}})
api := prometheus.NewAPI(client)
result, w, err := api.Query(
context.Background(),
objective.Query,
queryTime,
[]prometheus.Option{}...,
)

if err != nil {
query.Error = err.Error()
return query
}

if len(w) != 0 {
query.Error = w[0]
r.Log.Info("Prometheus API returned warnings: " + w[0])
}

// check if we can cast the result to a vector, it might be another data struct which we can't process
resultVector, ok := result.(model.Vector)
if !ok {
query.Error = "could not cast result"
return query
}

// We are only allowed to return one value, if not the query may be malformed
// we are using two different errors to give the user more information about the result
if len(resultVector) == 0 {
r.Log.Info("No values in query result")
query.Error = "No values in query result"
return query
} else if len(resultVector) > 1 {
r.Log.Info("Too many values in the query result")
query.Error = "Too many values in the query result"
return query
}

//TODO query provider like prometheus service does, save result in value THIS SHALL BE SOLVED IN TICKET #163
// it will be something hardcoded like
// import apiv1 "github.com/prometheus/client_golang/api/prometheus/v1"
// if provider ==prometheus { result, w, err := apiv1.PrometheusAPI.Query(context.Background(), query, endUnix) if err != nil { return 0, fmt.Errorf("unable to query prometheus api: %w", err)}}
//TODO check value with evaluation target and update status in query
// result, w, err := prometheus.API().Query(context.Background(), query, time.Now())
// parse the first entry as float and return the value if it's a valid float value
query.Value = resultVector[0].Value.String()

check, err := r.checkValue(objective, query)

if err != nil {
query.Error = err.Error()
r.Log.Error(err, "Could not check query result")
}
if check {
query.Status = common.StateSucceeded
}
return query
}

func (r *KeptnEvaluationReconciler) checkValue(objective klcv1alpha1.Objective, query *klcv1alpha1.EvaluationStatusItem) (bool, error) {

if len(query.Value) == 0 || len(objective.EvaluationTarget) == 0 {
return false, fmt.Errorf("no values")
}

eval := objective.EvaluationTarget[1:]
sign := objective.EvaluationTarget[:1]

resultValue, err := strconv.ParseFloat(query.Value, 64)
if err != nil || math.IsNaN(resultValue) {
return false, err
}

compareValue, err := strconv.ParseFloat(eval, 64)
if err != nil || math.IsNaN(compareValue) {
return false, err
}

// choose comparator
switch sign {
case ">":
return resultValue > compareValue, nil
case "<":
return resultValue < compareValue, nil
default:
return false, fmt.Errorf("invalid operator")
}
}

func (r *KeptnEvaluationReconciler) recordEvent(eventType string, evaluation *klcv1alpha1.KeptnEvaluation, shortReason string, longReason string) {
r.Recorder.Event(evaluation, eventType, shortReason, fmt.Sprintf("%s / Namespace: %s, Name: %s, WorkloadVersion: %s ", longReason, evaluation.Namespace, evaluation.Name, evaluation.Spec.WorkloadVersion))
}
2 changes: 1 addition & 1 deletion operator/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/onsi/ginkgo v1.16.5
github.com/onsi/gomega v1.18.1
github.com/prometheus/client_golang v1.13.0
github.com/prometheus/common v0.37.0
github.com/stretchr/testify v1.7.1
go.opentelemetry.io/otel v1.10.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.10.0
Expand Down Expand Up @@ -67,7 +68,6 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.10.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions operator/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22
github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
Expand Down Expand Up @@ -376,6 +377,7 @@ github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8m
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
Expand Down