From ab32f2da956094baf794ae09f75ccfca48ff018b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Sevilla?= Date: Tue, 28 Jun 2022 16:08:29 +0200 Subject: [PATCH] Elapsed magic variable (#178) * Elapsed magic variable Signed-off-by: Raul Sevilla * Add top2PrometheusCPU to test Signed-off-by: Raul Sevilla * Increase test duration Signed-off-by: Raul Sevilla * Fix query and update number of objects in run.sh Signed-off-by: Raul Sevilla --- docs/metrics.md | 10 ++++++++++ pkg/burner/namespaces.go | 5 ++--- pkg/prometheus/prometheus.go | 33 ++++++++++++++++++++------------- test/alert-profile.yaml | 2 +- test/base.sh | 6 +++--- test/kube-burner.yml | 2 +- test/metrics-profile.yaml | 3 +++ test/run.sh | 6 +++--- 8 files changed, 43 insertions(+), 24 deletions(-) diff --git a/docs/metrics.md b/docs/metrics.md index 95e1b4bb2..56dbd22b8 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -59,6 +59,16 @@ Apart from range queries, kube-burner has the ability perform instant queries by instant: true ``` +## Using the elapsed variable + +There's a special go-template variable that can be used within the prometheus expression, the variable **elapsed** is set to the value of the job duration (or the range given to index). This variable is specially useful in expressions using [aggregations over time functions](https://prometheus.io/docs/prometheus/latest/querying/functions/#aggregation_over_time). +i.e: The following expression gets the top 3 CPU usage of cluster's kubelets + +```yaml +- query: irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m]) * 100 and on (node) topk(3,avg_over_time(irate(process_cpu_seconds_total{service="kubelet",job="kubelet"}[2m])[{{ .elapsed }}:]) and on (node) kube_node_role{role="worker"}) + metricName: kubeletCPU +``` + Examples of metrics profiles can be found in the [examples directory](https://github.com/cloud-bulldozer/kube-burner/tree/master/examples/). There're are also ElasticSearch based grafana dashboards available in the same examples directory. ## Job Summary diff --git a/pkg/burner/namespaces.go b/pkg/burner/namespaces.go index ebb337ab9..1c8925f0c 100644 --- a/pkg/burner/namespaces.go +++ b/pkg/burner/namespaces.go @@ -27,14 +27,13 @@ import ( ) func createNamespace(clientset *kubernetes.Clientset, namespaceName string, nsLabels map[string]string) error { - nsLabels["pod-security.kubernetes.io/warn"] = "privileged" ns := v1.Namespace{ ObjectMeta: metav1.ObjectMeta{Name: namespaceName, Labels: nsLabels}, } return RetryWithExponentialBackOff(func() (done bool, err error) { _, err = clientset.CoreV1().Namespaces().Create(context.TODO(), &ns, metav1.CreateOptions{}) if errors.IsForbidden(err) { - log.Fatalf("Authorization error creating namespace %s: %s", ns.Name, err) + log.Fatalf("authorization error creating namespace %s: %s", ns.Name, err) return false, err } if errors.IsAlreadyExists(err) { @@ -46,7 +45,7 @@ func createNamespace(clientset *kubernetes.Clientset, namespaceName string, nsLa } return true, nil } else if err != nil { - log.Errorf("Unexpected error creating namespace %s: %s", ns.Name, err) + log.Errorf("unexpected error creating namespace %s", ns.Name) return false, nil } log.Debugf("Created namespace: %s", ns.Name) diff --git a/pkg/prometheus/prometheus.go b/pkg/prometheus/prometheus.go index e9749b66c..1866d058f 100644 --- a/pkg/prometheus/prometheus.go +++ b/pkg/prometheus/prometheus.go @@ -15,6 +15,7 @@ package prometheus import ( + "bytes" "context" "crypto/tls" "encoding/json" @@ -24,6 +25,7 @@ import ( "os" "path" "strings" + "text/template" "time" "github.com/cloud-bulldozer/kube-burner/log" @@ -113,36 +115,41 @@ func (p *Prometheus) ScrapeMetrics(start, end time.Time, indexer *indexers.Index func (p *Prometheus) ScrapeJobsMetrics(jobList []burner.Executor, indexer *indexers.Indexer) error { start := jobList[0].Start end := jobList[len(jobList)-1].End - var filename string + elapsed := int(end.Sub(start).Minutes()) var err error var v model.Value + var renderedQuery bytes.Buffer log.Infof("🔍 Scraping prometheus metrics for benchmark from %s to %s", start, end) for _, md := range p.MetricProfile { var metrics []interface{} + t, _ := template.New("").Parse(md.Query) + t.Execute(&renderedQuery, map[string]string{"elapsed": fmt.Sprintf("%dm", elapsed)}) + query := renderedQuery.String() + renderedQuery.Reset() if md.Instant { - log.Debugf("Instant query: %s", md.Query) - if v, err = p.Query(md.Query, end); err != nil { - log.Warnf("Error found with query %s: %s", md.Query, err) + log.Debugf("Instant query: %s", query) + if v, err = p.Query(query, end); err != nil { + log.Warnf("Error found with query %s: %s", query, err) continue } - if err := p.parseVector(md.MetricName, md.Query, jobList, v, &metrics); err != nil { - log.Warnf("Error found parsing result from query %s: %s", md.Query, err) + if err := p.parseVector(md.MetricName, query, jobList, v, &metrics); err != nil { + log.Warnf("Error found parsing result from query %s: %s", query, err) } } else { - log.Debugf("Range query: %s", md.Query) - p.QueryRange(md.Query, start, end) - v, err = p.QueryRange(md.Query, start, end) + log.Debugf("Range query: %s", query) + p.QueryRange(query, start, end) + v, err = p.QueryRange(query, start, end) if err != nil { - log.Warnf("Error found with query %s: %s", md.Query, err) + log.Warnf("Error found with query %s: %s", query, err) continue } - if err := p.parseMatrix(md.MetricName, md.Query, jobList, v, &metrics); err != nil { - log.Warnf("Error found parsing result from query %s: %s", md.Query, err) + if err := p.parseMatrix(md.MetricName, query, jobList, v, &metrics); err != nil { + log.Warnf("Error found parsing result from query %s: %s", query, err) continue } } if config.ConfigSpec.GlobalConfig.WriteToFile { - filename = fmt.Sprintf("%s-%s.json", md.MetricName, p.uuid) + filename := fmt.Sprintf("%s-%s.json", md.MetricName, p.uuid) if config.ConfigSpec.GlobalConfig.MetricsDirectory != "" { err = os.MkdirAll(config.ConfigSpec.GlobalConfig.MetricsDirectory, 0744) if err != nil { diff --git a/test/alert-profile.yaml b/test/alert-profile.yaml index 0c8bee4cc..70320a874 100644 --- a/test/alert-profile.yaml +++ b/test/alert-profile.yaml @@ -2,6 +2,6 @@ severity: critical description: No prometheus instance found -- expr: avg_over_time(prometheus_tsdb_wal_page_flushes_total[{{ .elapsed }}:]) +- expr: avg_over_time(prometheus_tsdb_wal_page_flushes_total[{{ .elapsed }}:]) < 0 severity: warning description: Test diff --git a/test/base.sh b/test/base.sh index 87f4cc90a..9219e2bf7 100644 --- a/test/base.sh +++ b/test/base.sh @@ -1,10 +1,10 @@ #!/bin/bash trap print_events ERR -export QPS=5 -export BURST=10 +export QPS=2 +export BURST=2 export TERM=screen-256color -export JOB_ITERATIONS=4 +export JOB_ITERATIONS=9 bold=$(tput bold) normal=$(tput sgr0) diff --git a/test/kube-burner.yml b/test/kube-burner.yml index 0174af44b..372f9e028 100644 --- a/test/kube-burner.yml +++ b/test/kube-burner.yml @@ -25,7 +25,7 @@ jobs: waitWhenFinished: true verifyObjects: true errorOnVerify: true - jobIterationDelay: 1s + jobIterationDelay: 10s maxWaitTimeout: 2m objects: diff --git a/test/metrics-profile.yaml b/test/metrics-profile.yaml index 9f367e6ae..c9b887c26 100644 --- a/test/metrics-profile.yaml +++ b/test/metrics-profile.yaml @@ -1,6 +1,9 @@ - query: process_resident_memory_bytes{job="prometheus"} metricName: prometheusRSS +- query: irate(process_cpu_seconds_total{job="prometheus"}[2m]) and on (job) topk(2,avg_over_time(process_cpu_seconds_total{job="prometheus"}[{{.elapsed}}:])) + metricName: top2PrometheusCPU + - query: prometheus_build_info metricName: prometheusBuildInfo instant: true diff --git a/test/run.sh b/test/run.sh index 872b14066..3094117f4 100755 --- a/test/run.sh +++ b/test/run.sh @@ -37,7 +37,7 @@ check_running_pods() { } check_files () { - for f in collected-metrics/prometheusRSS-${uuid}.json collected-metrics/prometheusRSS-${uuid}.json collected-metrics/namespaced-podLatency.json collected-metrics/namespaced-podLatency-summary.json; do + for f in collected-metrics/top2PrometheusCPU-${uuid}.json collected-metrics/prometheusRSS-${uuid}.json collected-metrics/prometheusRSS-${uuid}.json collected-metrics/namespaced-podLatency.json collected-metrics/namespaced-podLatency-summary.json; do log "Checking file ${f}" if [[ ! -f $f ]]; then log "File ${f} not present" @@ -51,9 +51,9 @@ check_files () { log "Running kube-burner init" timeout 300 kube-burner init -c kube-burner.yml --uuid ${uuid} --log-level=debug -u http://localhost:9090 -m metrics-profile.yaml -a alert-profile.yaml check_files -check_ns kube-burner-job=namespaced,kube-burner-uuid=${uuid} 5 +check_ns kube-burner-job=namespaced,kube-burner-uuid=${uuid} 10 check_destroyed_ns kube-burner-job=not-namespaced,kube-burner-uuid=${uuid} -check_running_pods kube-burner-job=namespaced,kube-burner-uuid=${uuid} 5 +check_running_pods kube-burner-job=namespaced,kube-burner-uuid=${uuid} 10 log "Running kube-burner destroy" kube-burner destroy --uuid ${uuid} check_destroyed_ns kube-burner-job=namespaced,kube-burner-uuid=${uuid}