From 0e038b0b669ba7cadd8eef4364e7ca69af0ad7f2 Mon Sep 17 00:00:00 2001 From: Kiichiro YUKAWA Date: Fri, 15 Mar 2024 12:45:05 +0900 Subject: [PATCH] Add base of benchmark operator dashboard (#2430) * :chart_with_upwards_trend: Add base of benchmark operator dashboard Signed-off-by: vankichi * :green_heart: Fix build benchmark images trigger Signed-off-by: vankichi * :green_heart: Add info tag for info metrics Signed-off-by: vankichi * :chart_with_upwards_trend: showing job image and tags Signed-off-by: vankichi * :chart_with_upwards_trend: add bench job status Signed-off-by: vankichi * :recycle: Update dashboard and rename Signed-off-by: vankichi * :recycle: Fix Signed-off-by: vankichi * Update internal/observability/metrics/tools/benchmark/benchmark.go Co-authored-by: Hiroto Funakoshi Signed-off-by: Kiichiro YUKAWA --------- Signed-off-by: vankichi Signed-off-by: Kiichiro YUKAWA Co-authored-by: Hiroto Funakoshi --- .../workflows/dockers-benchmark-job-image.yml | 24 +- .../dockers-benchmark-operator-image.yaml | 24 +- .../crds/valdbenchmarkoperatorrelease.yaml | 9 +- .../templates/deployment.yaml | 7 +- charts/vald-benchmark-operator/values.yaml | 25 +- internal/config/benchmark.go | 4 +- .../metrics/tools/benchmark/benchmark.go | 258 +++ .../metrics/tools/benchmark/benchmark_test.go | 299 +++ .../10-vald-benchmark-operator.yaml | 2053 +++++++++++++++++ k8s/metrics/grafana/deployment.yaml | 6 + .../crds/valdbenchmarkoperatorrelease.yaml | 9 +- k8s/tools/benchmark/operator/deployment.yaml | 12 + pkg/tools/benchmark/operator/config/config.go | 10 - .../benchmark/operator/service/operator.go | 40 + .../benchmark/operator/usecase/benchmarkd.go | 12 +- 15 files changed, 2741 insertions(+), 51 deletions(-) create mode 100644 internal/observability/metrics/tools/benchmark/benchmark.go create mode 100644 internal/observability/metrics/tools/benchmark/benchmark_test.go create mode 100644 k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml diff --git a/.github/workflows/dockers-benchmark-job-image.yml b/.github/workflows/dockers-benchmark-job-image.yml index 4a2d0479f6..4ecd0e4b60 100644 --- a/.github/workflows/dockers-benchmark-job-image.yml +++ b/.github/workflows/dockers-benchmark-job-image.yml @@ -33,10 +33,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/job/Dockerfile" - "versions/GO_VERSION" pull_request: @@ -50,10 +50,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/job/Dockerfile" - "versions/GO_VERSION" pull_request_target: @@ -67,10 +67,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/job/Dockerfile" - "versions/GO_VERSION" jobs: diff --git a/.github/workflows/dockers-benchmark-operator-image.yaml b/.github/workflows/dockers-benchmark-operator-image.yaml index 2424657297..d56d0cd76f 100644 --- a/.github/workflows/dockers-benchmark-operator-image.yaml +++ b/.github/workflows/dockers-benchmark-operator-image.yaml @@ -33,10 +33,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/operator/Dockerfile" - "versions/GO_VERSION" pull_request: @@ -50,10 +50,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/operator/Dockerfile" - "versions/GO_VERSION" pull_request_target: @@ -67,10 +67,10 @@ on: - "!internal/**/*_test.go" - "!internal/db/**" - "apis/grpc/**" - - "pkg/benchmark/operator/**" - - "cmd/benchmark/operator/**" - - "pkg/benchmark/job/**" - - "cmd/benchmark/job/**" + - "pkg/tools/benchmark/operator/**" + - "cmd/tools/benchmark/operator/**" + - "pkg/tools/benchmark/job/**" + - "cmd/tools/benchmark/job/**" - "dockers/tools/benchmark/operator/Dockerfile" - "versions/GO_VERSION" jobs: diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkoperatorrelease.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkoperatorrelease.yaml index 8886ca72ea..92458c639c 100644 --- a/charts/vald-benchmark-operator/crds/valdbenchmarkoperatorrelease.yaml +++ b/charts/vald-benchmark-operator/crds/valdbenchmarkoperatorrelease.yaml @@ -40,7 +40,7 @@ spec: type: string schema: openAPIV3Schema: - description: ValdBenchmarkScenario is the Schema for the valdbenchmarkscenarios API + description: ValdBenchmarkOperator is the Schema for the valdbenchmarkoperator API type: object properties: apiVersion: @@ -52,7 +52,7 @@ spec: metadata: type: object status: - description: ValdBenchmarkScenarioStatus defines the observed state of ValdBenchmarkScenario + description: ValdBenchmarkOperatorStatus defines the observed state of ValdBenchmarkOperator enum: - NotReady - Completed @@ -68,6 +68,11 @@ spec: annotations: type: object x-kubernetes-preserve-unknown-fields: true + env: + type: array + items: + type: object + x-kubernetes-preserve-unknown-fields: true image: type: object properties: diff --git a/charts/vald-benchmark-operator/templates/deployment.yaml b/charts/vald-benchmark-operator/templates/deployment.yaml index a1dcf4ab59..14df9b4c2c 100644 --- a/charts/vald-benchmark-operator/templates/deployment.yaml +++ b/charts/vald-benchmark-operator/templates/deployment.yaml @@ -122,11 +122,10 @@ spec: volumeMounts: - name: {{ .Values.name }}-config mountPath: /etc/server + {{- if .Values.env }} env: - - name: JOB_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace + {{- toYaml .Values.env | nindent 12 }} + {{- end }} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File restartPolicy: Always diff --git a/charts/vald-benchmark-operator/values.yaml b/charts/vald-benchmark-operator/values.yaml index ea2ce647b7..3aebcabc36 100644 --- a/charts/vald-benchmark-operator/values.yaml +++ b/charts/vald-benchmark-operator/values.yaml @@ -454,11 +454,11 @@ observability: # @schema {"name": "observability.otlp.attribute", "type": "object"} attribute: # @schema {"name": "observability.otlp.attribute.namespace", "type": "string"} - namespace: "_MY_POD_NAMESPACE_" + namespace: _MY_POD_NAMESPACE_ # @schema {"name": "observability.otlp.attribute.pod_name", "type": "string"} - pod_name: "_MY_POD_NAME_" + pod_name: _MY_POD_NAME_ # @schema {"name": "observability.otlp.attribute.node_name", "type": "string"} - node_name: "_MY_NODE_NAME_" + node_name: _MY_NODE_NAME_ # @schema {"name": "observability.otlp.attribute.service_name", "type": "string"} service_name: "vald-benchmark-operator" # @schema {"name": "observability.otlp.attribute.metrics", "type": "object"} @@ -487,3 +487,22 @@ observability: enabled: false # @schema {"name": "observability.trace.sampling_rate", "type": "integer"} sampling_rate: 1 +# @schema {"name": "env", "type": "array", "items": {"type": "object"}, "anchor": "env"} +# env -- environment variables +env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: JOB_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace diff --git a/internal/config/benchmark.go b/internal/config/benchmark.go index 68272a58bc..fd45f4f293 100644 --- a/internal/config/benchmark.go +++ b/internal/config/benchmark.go @@ -234,8 +234,8 @@ func (b *BenchmarkScenario) Bind() *BenchmarkScenario { // BenchmarkJobImageInfo represents the docker image information for benchmark job. type BenchmarkJobImageInfo struct { - Image string `json:"image,omitempty" yaml:"image"` - PullPolicy string `json:"pull_policy,omitempty" yaml:"pull_policy"` + Image string `info:"image" json:"image,omitempty" yaml:"image"` + PullPolicy string `info:"pull_policy" json:"pull_policy,omitempty" yaml:"pull_policy"` } // Bind binds the actual data from the BenchmarkJobImageInfo receiver fields. diff --git a/internal/observability/metrics/tools/benchmark/benchmark.go b/internal/observability/metrics/tools/benchmark/benchmark.go new file mode 100644 index 0000000000..370e07cbff --- /dev/null +++ b/internal/observability/metrics/tools/benchmark/benchmark.go @@ -0,0 +1,258 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package benchmark + +import ( + "context" + + v1 "github.com/vdaas/vald/internal/k8s/vald/benchmark/api/v1" + "github.com/vdaas/vald/internal/observability/metrics" + "github.com/vdaas/vald/pkg/tools/benchmark/operator/service" + api "go.opentelemetry.io/otel/metric" + view "go.opentelemetry.io/otel/sdk/metric" +) + +const ( + appliedScenarioCount = "benchmark_operator_applied_scenario" + appliedScenarioCountDescription = "Benchmark Operator applied scenario count" + + runningScenarioCount = "benchmark_operator_running_scenario" + runningScenarioCountDescription = "Benchmark Operator running scenario count" + + completeScenarioCount = "benchmark_operator_complete_scenario" + completeScenarioCountDescription = "Benchmark Operator complete scenario count" + + appliedBenchmarkJobCount = "benchmark_operator_applied_benchmark_job" + appliedBenchmarkJobCountDescription = "Benchmark Operator applied benchmark job count" + + runningBenchmarkJobCount = "benchmark_operator_running_benchmark_job" + runningBenchmarkJobCountDescription = "Benchmark Operator running benchmark job count" + + completeBenchmarkJobCount = "benchmark_operator_complete_benchmark_job" + completeBenchmarkJobCountDescription = "Benchmark Operator complete benchmark job count" + + // appliedJobCount = "benchmark_operator_applied_job" + // appliedJobCountDescription = "Benchmark Operator applied job count" + + // runningJobCount = "benchmark_operator_running_job" + // runningJobCountDescription = "Benchmark Operator running job count" + + // completeJobCount = "benchmark_operator_complete_job" + // completeJobCountDescription = "Benchmark Operator complete job count" +) + +const ( + applied = "applied" + running = "running" + complete = "complete" +) + +type operatorMetrics struct { + op service.Operator +} + +func New(om service.Operator) metrics.Metric { + return &operatorMetrics{ + op: om, + } +} + +// TODO: implement here +func (om *operatorMetrics) View() ([]metrics.View, error) { + return []metrics.View{ + view.NewView( + view.Instrument{ + Name: appliedScenarioCount, + Description: appliedScenarioCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: runningScenarioCount, + Description: runningScenarioCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: completeScenarioCount, + Description: completeScenarioCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: appliedBenchmarkJobCount, + Description: appliedBenchmarkJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: runningBenchmarkJobCount, + Description: runningBenchmarkJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + view.NewView( + view.Instrument{ + Name: completeBenchmarkJobCount, + Description: completeBenchmarkJobCountDescription, + }, + view.Stream{ + Aggregation: view.AggregationLastValue{}, + }, + ), + }, nil +} + +// TODO: implement here +func (om *operatorMetrics) Register(m metrics.Meter) error { + appliedScenarioCount, err := m.Int64ObservableCounter( + appliedScenarioCount, + metrics.WithDescription(appliedScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + runningScenarioCount, err := m.Int64ObservableCounter( + runningScenarioCount, + metrics.WithDescription(runningScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + completeScenarioCount, err := m.Int64ObservableCounter( + completeScenarioCount, + metrics.WithDescription(completeScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + + appliedBenchJobCount, err := m.Int64ObservableCounter( + appliedBenchmarkJobCount, + metrics.WithDescription(appliedScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + runningBenchJobCount, err := m.Int64ObservableCounter( + runningBenchmarkJobCount, + metrics.WithDescription(runningScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + completeBenchJobCount, err := m.Int64ObservableCounter( + completeBenchmarkJobCount, + metrics.WithDescription(completeScenarioCountDescription), + metrics.WithUnit(metrics.Dimensionless), + ) + if err != nil { + return err + } + + // appliedJobCount, err := m.Int64ObservableCounter( + // appliedJobCount, + // metrics.WithDescription(appliedJobCountDescription), + // metrics.WithUnit(metrics.Dimensionless), + // ) + // if err != nil { + // return err + // } + // runningJobCount, err := m.Int64ObservableCounter( + // runningJobCount, + // metrics.WithDescription(runningJobCountDescription), + // metrics.WithUnit(metrics.Dimensionless), + // ) + // if err != nil { + // return err + // } + // completeJobCount, err := m.Int64ObservableCounter( + // completeBenchmarkJobCount, + // metrics.WithDescription(completeScenarioCountDescription), + // metrics.WithUnit(metrics.Dimensionless), + // ) + // if err != nil { + // return err + // } + + _, err = m.RegisterCallback( + func(_ context.Context, o api.Observer) error { + // scenario status + sst := map[string]int64{ + applied: 0, + running: 0, + complete: 0, + } + for k, v := range om.op.GetScenarioStatus() { + sst[applied] += v + if k == v1.BenchmarkScenarioCompleted { + sst[complete] += v + } else { + sst[running] += v + } + } + o.ObserveInt64(appliedScenarioCount, sst[applied]) + o.ObserveInt64(runningScenarioCount, sst[running]) + o.ObserveInt64(completeScenarioCount, sst[complete]) + + // benchmark job status + bst := map[string]int64{ + applied: 0, + running: 0, + complete: 0, + } + for k, v := range om.op.GetBenchmarkJobStatus() { + bst[applied] += v + if k == v1.BenchmarkJobCompleted { + bst[complete] += v + } else { + bst[running] += v + } + } + o.ObserveInt64(appliedBenchJobCount, bst[applied]) + o.ObserveInt64(runningBenchJobCount, bst[running]) + o.ObserveInt64(completeBenchJobCount, bst[complete]) + return nil + }, + appliedScenarioCount, + runningScenarioCount, + completeScenarioCount, + appliedBenchJobCount, + runningBenchJobCount, + completeBenchJobCount, + // appliedJobCount, + // runningJobCount, + // completeJobCount, + ) + return err +} diff --git a/internal/observability/metrics/tools/benchmark/benchmark_test.go b/internal/observability/metrics/tools/benchmark/benchmark_test.go new file mode 100644 index 0000000000..b444083232 --- /dev/null +++ b/internal/observability/metrics/tools/benchmark/benchmark_test.go @@ -0,0 +1,299 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package benchmark + +import ( + "reflect" + "testing" + + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/observability/metrics" + "github.com/vdaas/vald/internal/test/goleak" + "github.com/vdaas/vald/pkg/tools/benchmark/operator/service" +) + +func TestNew(t *testing.T) { + type args struct { + om service.Operator + } + type want struct { + want metrics.Metric + } + type test struct { + name string + args args + want want + checkFunc func(want, metrics.Metric) error + beforeFunc func(*testing.T, args) + afterFunc func(*testing.T, args) + } + defaultCheckFunc := func(w want, got metrics.Metric) error { + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + om:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T, args args) { + t.Helper() + }, + afterFunc: func(t *testing.T, args args) { + t.Helper() + }, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + om:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T, args args) { + t.Helper() + }, + afterFunc: func(t *testing.T, args args) { + t.Helper() + }, + } + }(), + */ + } + + for _, tc := range tests { + test := tc + t.Run(test.name, func(tt *testing.T) { + tt.Parallel() + defer goleak.VerifyNone(tt, goleak.IgnoreCurrent()) + if test.beforeFunc != nil { + test.beforeFunc(tt, test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(tt, test.args) + } + checkFunc := test.checkFunc + if test.checkFunc == nil { + checkFunc = defaultCheckFunc + } + + got := New(test.args.om) + if err := checkFunc(test.want, got); err != nil { + tt.Errorf("error = %v", err) + } + }) + } +} + +func Test_operatorMetrics_View(t *testing.T) { + type fields struct { + op service.Operator + } + type want struct { + want []metrics.View + err error + } + type test struct { + name string + fields fields + want want + checkFunc func(want, []metrics.View, error) error + beforeFunc func(*testing.T) + afterFunc func(*testing.T) + } + defaultCheckFunc := func(w want, got []metrics.View, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + } + if !reflect.DeepEqual(got, w.want) { + return errors.Errorf("got: \"%#v\",\n\t\t\t\twant: \"%#v\"", got, w.want) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + fields: fields { + op:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T,) { + t.Helper() + }, + afterFunc: func(t *testing.T,) { + t.Helper() + }, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + fields: fields { + op:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T,) { + t.Helper() + }, + afterFunc: func(t *testing.T,) { + t.Helper() + }, + } + }(), + */ + } + + for _, tc := range tests { + test := tc + t.Run(test.name, func(tt *testing.T) { + tt.Parallel() + defer goleak.VerifyNone(tt, goleak.IgnoreCurrent()) + if test.beforeFunc != nil { + test.beforeFunc(tt) + } + if test.afterFunc != nil { + defer test.afterFunc(tt) + } + checkFunc := test.checkFunc + if test.checkFunc == nil { + checkFunc = defaultCheckFunc + } + om := &operatorMetrics{ + op: test.fields.op, + } + + got, err := om.View() + if err := checkFunc(test.want, got, err); err != nil { + tt.Errorf("error = %v", err) + } + }) + } +} + +func Test_operatorMetrics_Register(t *testing.T) { + type args struct { + m metrics.Meter + } + type fields struct { + op service.Operator + } + type want struct { + err error + } + type test struct { + name string + args args + fields fields + want want + checkFunc func(want, error) error + beforeFunc func(*testing.T, args) + afterFunc func(*testing.T, args) + } + defaultCheckFunc := func(w want, err error) error { + if !errors.Is(err, w.err) { + return errors.Errorf("got_error: \"%#v\",\n\t\t\t\twant: \"%#v\"", err, w.err) + } + return nil + } + tests := []test{ + // TODO test cases + /* + { + name: "test_case_1", + args: args { + m:nil, + }, + fields: fields { + op:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T, args args) { + t.Helper() + }, + afterFunc: func(t *testing.T, args args) { + t.Helper() + }, + }, + */ + + // TODO test cases + /* + func() test { + return test { + name: "test_case_2", + args: args { + m:nil, + }, + fields: fields { + op:nil, + }, + want: want{}, + checkFunc: defaultCheckFunc, + beforeFunc: func(t *testing.T, args args) { + t.Helper() + }, + afterFunc: func(t *testing.T, args args) { + t.Helper() + }, + } + }(), + */ + } + + for _, tc := range tests { + test := tc + t.Run(test.name, func(tt *testing.T) { + tt.Parallel() + defer goleak.VerifyNone(tt, goleak.IgnoreCurrent()) + if test.beforeFunc != nil { + test.beforeFunc(tt, test.args) + } + if test.afterFunc != nil { + defer test.afterFunc(tt, test.args) + } + checkFunc := test.checkFunc + if test.checkFunc == nil { + checkFunc = defaultCheckFunc + } + om := &operatorMetrics{ + op: test.fields.op, + } + + err := om.Register(test.args.m) + if err := checkFunc(test.want, err); err != nil { + tt.Errorf("error = %v", err) + } + }) + } +} diff --git a/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml new file mode 100644 index 0000000000..9ee9d26a62 --- /dev/null +++ b/k8s/metrics/grafana/dashboards/10-vald-benchmark-operator.yaml @@ -0,0 +1,2053 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboards-vald-benchmark-operator +data: + vald-benchmark-operator.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 14, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^vald_version$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Operator Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 16, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^go_version$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "label_replace(app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}, \"go_version\", \"v$1\", \"go_version\", \"([^v].*)\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Go Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 34, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^go_os$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Go OS", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100 + }, + { + "color": "#d44a3a", + "value": 300 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 19, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "count(kube_pod_info{namespace=\"$Namespace\", pod=~\"$ReplicaSet.*\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pods ($ReplicaSet)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 10000000000 + }, + { + "color": "#d44a3a", + "value": 1000000000000 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 20, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", image!=\"\"})", + "format": "time_series", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total memory working set ($ReplicaSet)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 0, + "y": 3 + }, + "id": 17, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^git_commit$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Git Commit", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 3 + }, + "id": 18, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^build_time$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersin": "8.0.1", + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Build at", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 6 + }, + "id": 42, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_applied_scenario{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "All Scenario Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 6 + }, + "id": 43, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_running_scenario{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Running Scenario Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 6 + }, + "id": 44, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_complete_scenario{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Completed Scenario Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 12, + "y": 6 + }, + "id": 41, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^image$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "label_replace(benchmark_operator_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}, \"image\", \"$1\", \"image\", \"(.*):.*\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Job Image Name", + "transformations": [ + { + "id": "partitionByValues", + "options": { + "keepFields": true, + "naming": { + "asLabels": true + } + } + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 6 + }, + "id": 40, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^image$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "label_replace(benchmark_operator_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}, \"image\", \"$1\", \"image\", \".*:(.*)\")", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Job Image Version", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 9 + }, + "id": 45, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_applied_benchmark_job{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "All Benchmark Job Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 9 + }, + "id": 46, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_running_benchmark_job{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Running Benchmark Job Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 9 + }, + "id": 47, + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^Value$/", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(benchmark_operator_complete_benchmark_job{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "Completed Benchmark Job Count", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 39, + "panels": [], + "title": "Operator Metrics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.4.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}[$interval])) by (pod) and on() count(kube_statefulset_created{statefulset=\"$ReplicaSet\"}) >= 1", + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}[$interval])) by (pod) and on() count(kube_deployment_created{deployment=\"$ReplicaSet\"}) >= 1", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}[$interval])) by (pod) and on() count(kube_daemonset_created{daemonset=\"$ReplicaSet\"}) >= 1", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CPU", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:76", + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:77", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 13 + }, + "hiddenSeries": false, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.4.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}) by (pod) and on() count(kube_statefulset_created{statefulset=\"$ReplicaSet\"}) >= 1", + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}) by (pod) and on() count(kube_deployment_created{deployment=\"$ReplicaSet\"}) >= 1", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}) by (pod) and on() count(kube_daemonset_created{daemonset=\"$ReplicaSet\"}) >= 1", + "hide": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory working set", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:154", + "format": "decbytes", + "logBase": 1, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:155", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "hiddenSeries": false, + "id": 30, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.4.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(server_completed_rpcs{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (grpc_server_method, grpc_server_status)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} ({{grpc_server_status}})", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(irate(server_completed_rpcs{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (grpc_server_status)", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Total ({{grpc_server_status}})", + "range": true, + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Completed RPCs /s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 21 + }, + "hiddenSeries": false, + "id": 32, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.4.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} p99", + "range": true, + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 29 + }, + "hiddenSeries": false, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.4.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "goroutine_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{target_pod}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "goroutine count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "hiddenSeries": false, + "id": 38, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.4.0", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "increase(gc_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_node=~\".+\"}[$interval])", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{target_pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "GC count /s", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(kube_pod_info, namespace)", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "Namespace", + "options": [], + "query": { + "query": "label_values(kube_pod_info, namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "vald-benchmark-operator", + "value": "vald-benchmark-operator" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(app_version_info{server_name=~\"benchmark operator.*\"}, kubernetes_name)", + "hide": 0, + "includeAll": false, + "label": "name", + "multi": false, + "name": "ReplicaSet", + "options": [], + "query": { + "query": "label_values(app_version_info{server_name=~\"benchmark operator.*\"}, kubernetes_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(app_version_info{server_name=~\"benchmark operator.*\", kubernetes_name=~\"$ReplicaSet\"}, target_pod)", + "hide": 0, + "includeAll": true, + "label": "pod", + "multi": false, + "name": "PodName", + "options": [], + "query": { + "query": "label_values(app_version_info{server_name=~\"benchmark operator.*\", kubernetes_name=~\"$ReplicaSet\"}, target_pod)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "text": "5m", + "value": "5m" + }, + "hide": 0, + "label": "interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "2m", + "value": "2m" + }, + { + "selected": true, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + } + ], + "query": "1m,2m,5m,10m,30m,1h,6h,12h,1d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Vald Benchmark Operator", + "uid": "fdewjfx1jkxz4b", + "version": 1, + "weekStart": "" + } diff --git a/k8s/metrics/grafana/deployment.yaml b/k8s/metrics/grafana/deployment.yaml index cfdee03814..21555cbe04 100644 --- a/k8s/metrics/grafana/deployment.yaml +++ b/k8s/metrics/grafana/deployment.yaml @@ -51,6 +51,8 @@ spec: mountPath: /var/lib/grafana/dashboards-vald/08 - name: grafana-dashboards-vald-index-correction mountPath: /var/lib/grafana/dashboards-vald/09 + - name: grafana-dashboards-vald-benchmark-operator + mountPath: /var/lib/grafana/dashboards-vald/10 - name: grafana-dashboards-vald-agent-memory mountPath: /var/lib/grafana/dashboards-vald/99 volumes: @@ -94,3 +96,7 @@ spec: configMap: defaultMode: 420 name: grafana-dashboards-vald-agent-memory + - name: grafana-dashboards-vald-benchmark-operator + configMap: + defaultMode: 420 + name: grafana-dashboards-vald-benchmark-operator diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkoperatorrelease.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkoperatorrelease.yaml index 8886ca72ea..92458c639c 100644 --- a/k8s/tools/benchmark/operator/crds/valdbenchmarkoperatorrelease.yaml +++ b/k8s/tools/benchmark/operator/crds/valdbenchmarkoperatorrelease.yaml @@ -40,7 +40,7 @@ spec: type: string schema: openAPIV3Schema: - description: ValdBenchmarkScenario is the Schema for the valdbenchmarkscenarios API + description: ValdBenchmarkOperator is the Schema for the valdbenchmarkoperator API type: object properties: apiVersion: @@ -52,7 +52,7 @@ spec: metadata: type: object status: - description: ValdBenchmarkScenarioStatus defines the observed state of ValdBenchmarkScenario + description: ValdBenchmarkOperatorStatus defines the observed state of ValdBenchmarkOperator enum: - NotReady - Completed @@ -68,6 +68,11 @@ spec: annotations: type: object x-kubernetes-preserve-unknown-fields: true + env: + type: array + items: + type: object + x-kubernetes-preserve-unknown-fields: true image: type: object properties: diff --git a/k8s/tools/benchmark/operator/deployment.yaml b/k8s/tools/benchmark/operator/deployment.yaml index 0d011c806f..a5d2394d3b 100644 --- a/k8s/tools/benchmark/operator/deployment.yaml +++ b/k8s/tools/benchmark/operator/deployment.yaml @@ -98,6 +98,18 @@ spec: - name: vald-benchmark-operator-config mountPath: /etc/server env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace - name: JOB_NAMESPACE valueFrom: fieldRef: diff --git a/pkg/tools/benchmark/operator/config/config.go b/pkg/tools/benchmark/operator/config/config.go index 0fe9dadea0..f2c8f76b62 100644 --- a/pkg/tools/benchmark/operator/config/config.go +++ b/pkg/tools/benchmark/operator/config/config.go @@ -35,9 +35,6 @@ type Config struct { // Observability represent observability configurations Observability *config.Observability `json:"observability" yaml:"observability"` - // Scenario represents benchmark scenario configurations - Scenario *config.BenchmarkScenario `json:"scenario" yaml:"scenario"` - // JobImage represents the location of Docker image for benchmark job and its ImagePullPolicy JobImage *config.BenchmarkJobImageInfo `json:"job_image" yaml:"job_image"` } @@ -66,13 +63,6 @@ func NewConfig(path string) (cfg *Config, err error) { } else { cfg.JobImage = new(config.BenchmarkJobImageInfo) } - - if cfg.Scenario != nil { - cfg.Scenario = cfg.Scenario.Bind() - } else { - cfg.Scenario = new(config.BenchmarkScenario) - } - return cfg, nil } diff --git a/pkg/tools/benchmark/operator/service/operator.go b/pkg/tools/benchmark/operator/service/operator.go index 9ddf86a82a..fbd4a05ab8 100644 --- a/pkg/tools/benchmark/operator/service/operator.go +++ b/pkg/tools/benchmark/operator/service/operator.go @@ -38,6 +38,9 @@ import ( type Operator interface { PreStart(context.Context) error Start(context.Context) (<-chan error, error) + GetScenarioStatus() map[v1.ValdBenchmarkScenarioStatus]int64 + GetBenchmarkJobStatus() map[v1.BenchmarkJobStatus]int64 + // GetJobStatus() map[v1.BenchmarkJobStatus]int64 } type scenario struct { @@ -447,6 +450,7 @@ func (o *operator) createBenchmarkJob(ctx context.Context, scenario v1.ValdBench } // set status bj.Status = v1.BenchmarkJobNotReady + // TODO: set metrics // create benchmark job resource c := o.ctrl.GetManager().GetClient() if err := c.Create(ctx, bj); err != nil { @@ -640,6 +644,42 @@ func (o *operator) checkAtomics() error { return nil } +func (o *operator) GetScenarioStatus() map[v1.ValdBenchmarkScenarioStatus]int64 { + m := map[v1.ValdBenchmarkScenarioStatus]int64{ + v1.BenchmarkScenarioAvailable: 0, + v1.BenchmarkScenarioHealthy: 0, + v1.BenchmarkScenarioNotReady: 0, + v1.BenchmarkScenarioCompleted: 0, + } + if sc := o.getAtomicScenario(); sc != nil { + for _, s := range sc { + m[s.Crd.Status] += 1 + } + } + return m +} + +func (o *operator) GetBenchmarkJobStatus() map[v1.BenchmarkJobStatus]int64 { + m := map[v1.BenchmarkJobStatus]int64{ + v1.BenchmarkJobAvailable: 0, + v1.BenchmarkJobHealthy: 0, + v1.BenchmarkJobNotReady: 0, + v1.BenchmarkJobCompleted: 0, + } + if bjs := o.getAtomicBenchJob(); bjs != nil { + for _, bj := range bjs { + m[bj.Status] += 1 + } + } + return m +} + +// func (o *operator) GetJobStatus() map[job.JobStatus]int64 { +// m := map[job.JobStatus]int64{} +// // if js := o.getAtomicJob() +// return m +// } + func (*operator) PreStart(context.Context) error { log.Infof("[benchmark scenario operator] start vald benchmark scenario operator") return nil diff --git a/pkg/tools/benchmark/operator/usecase/benchmarkd.go b/pkg/tools/benchmark/operator/usecase/benchmarkd.go index 448d99f36a..5f4343f615 100644 --- a/pkg/tools/benchmark/operator/usecase/benchmarkd.go +++ b/pkg/tools/benchmark/operator/usecase/benchmarkd.go @@ -27,7 +27,9 @@ import ( "github.com/vdaas/vald/internal/net/grpc" "github.com/vdaas/vald/internal/net/grpc/interceptor/server/recover" "github.com/vdaas/vald/internal/observability" + backoffmetrics "github.com/vdaas/vald/internal/observability/metrics/backoff" infometrics "github.com/vdaas/vald/internal/observability/metrics/info" + benchmarkmetrics "github.com/vdaas/vald/internal/observability/metrics/tools/benchmark" "github.com/vdaas/vald/internal/runner" "github.com/vdaas/vald/internal/safety" "github.com/vdaas/vald/internal/servers/server" @@ -52,11 +54,11 @@ type run struct { var JOB_NAMESPACE = os.Getenv("JOB_NAMESPACE") func New(cfg *config.Config) (r runner.Runner, err error) { - log.Info("pkg/tools/benchmark/scenario/cmd start") + log.Info("pkg/tools/benchmark/operator/cmd start") eg := errgroup.Get() - log.Info("pkg/tools/benchmark/scenario/cmd success d") + log.Info("pkg/tools/benchmark/operator/cmd success d") operator, err := service.New( service.WithErrGroup(eg), @@ -95,7 +97,9 @@ func New(cfg *config.Config) (r runner.Runner, err error) { if cfg.Observability.Enabled { obs, err = observability.NewWithConfig( cfg.Observability, - infometrics.New("vald_benchmark_scenario_info", "Benchmark Scenario info", *cfg.Scenario), + benchmarkmetrics.New(operator), + infometrics.New("benchmark_operator_info", "Benchmark Operator info", *cfg.JobImage), + backoffmetrics.New(), ) if err != nil { return nil, err @@ -125,7 +129,7 @@ func New(cfg *config.Config) (r runner.Runner, err error) { if err != nil { return nil, err } - log.Info("pkg/tools/benchmark/scenario/cmd end") + log.Info("pkg/tools/benchmark/operator/cmd end") return &run{ eg: eg,