From 4bddfed947baa73b85e1b3b86ab9718a01eb574f Mon Sep 17 00:00:00 2001 From: ykadowak Date: Thu, 30 Nov 2023 07:49:37 +0000 Subject: [PATCH] Revert "Add index correction metrics (#2215)" This reverts commit ca118d3111bdba8853c7a1e81d6879a814f02ba1. --- charts/vald/values.yaml | 2 +- .../index/job/correction/correction.go | 108 -- .../dashboards/09-vald-index-correction.yaml | 1410 ----------------- k8s/metrics/grafana/deployment.yaml | 6 - pkg/index/job/correction/service/corrector.go | 37 +- pkg/index/job/correction/usecase/corrector.go | 6 +- 6 files changed, 9 insertions(+), 1560 deletions(-) delete mode 100644 internal/observability/metrics/index/job/correction/correction.go delete mode 100644 k8s/metrics/grafana/dashboards/09-vald-index-correction.yaml diff --git a/charts/vald/values.yaml b/charts/vald/values.yaml index 6d302054fc..46035e6197 100644 --- a/charts/vald/values.yaml +++ b/charts/vald/values.yaml @@ -2676,7 +2676,7 @@ manager: observability: otlp: attribute: - service_name: vald-index-correction + service_name: vald-manager-index # @schema {"name": "manager.index.corrector.enabled", "type": "boolean"} # manager.index.corrector.enabled -- enable index correction CronJob enabled: false diff --git a/internal/observability/metrics/index/job/correction/correction.go b/internal/observability/metrics/index/job/correction/correction.go deleted file mode 100644 index 0e31345e2b..0000000000 --- a/internal/observability/metrics/index/job/correction/correction.go +++ /dev/null @@ -1,108 +0,0 @@ -package correction - -import ( - "context" - - "github.com/vdaas/vald/internal/observability/metrics" - "github.com/vdaas/vald/pkg/index/job/correction/service" - "go.opentelemetry.io/otel/sdk/metric/aggregation" - "go.opentelemetry.io/otel/sdk/metric/view" -) - -const ( - checkedIndexCount = "index_job_correction_checked_index_count" - checkedIndexCountDesc = "The number of checked indexes while index correction job" - - correctedOldIndexCount = "index_job_correction_corrected_old_index_count" - correctedOldIndexCountDesc = "The number of corrected old indexes while index correction job" - - correctedReplicationCount = "index_job_correction_corrected_replication_count" - correctedReplicationCountDesc = "The number of operation happened to correct replication number while index correction job" -) - -type correctionMetrics struct { - correction service.Corrector -} - -func New(c service.Corrector) metrics.Metric { - return &correctionMetrics{ - correction: c, - } -} - -func (*correctionMetrics) View() ([]*metrics.View, error) { - checkedIndexCount, err := view.New( - view.MatchInstrumentName(checkedIndexCount), - view.WithSetDescription(checkedIndexCountDesc), - view.WithSetAggregation(aggregation.LastValue{}), - ) - if err != nil { - return nil, err - } - - oldIndexCount, err := view.New( - view.MatchInstrumentName(correctedOldIndexCount), - view.WithSetDescription(correctedOldIndexCountDesc), - view.WithSetAggregation(aggregation.LastValue{}), - ) - if err != nil { - return nil, err - } - - replicationCount, err := view.New( - view.MatchInstrumentName(correctedReplicationCount), - view.WithSetDescription(correctedReplicationCountDesc), - view.WithSetAggregation(aggregation.LastValue{}), - ) - if err != nil { - return nil, err - } - - return []*metrics.View{ - &checkedIndexCount, - &oldIndexCount, - &replicationCount, - }, nil -} - -func (c *correctionMetrics) Register(m metrics.Meter) error { - checkedIndexCount, err := m.AsyncInt64().Gauge( - checkedIndexCount, - metrics.WithDescription(checkedIndexCountDesc), - metrics.WithUnit(metrics.Dimensionless), - ) - if err != nil { - return err - } - - oldIndexCount, err := m.AsyncInt64().Gauge( - correctedOldIndexCount, - metrics.WithDescription(correctedOldIndexCountDesc), - metrics.WithUnit(metrics.Dimensionless), - ) - if err != nil { - return err - } - - replicationCount, err := m.AsyncInt64().Gauge( - correctedReplicationCount, - metrics.WithDescription(correctedReplicationCountDesc), - metrics.WithUnit(metrics.Dimensionless), - ) - if err != nil { - return err - } - - return m.RegisterCallback( - []metrics.AsynchronousInstrument{ - checkedIndexCount, - oldIndexCount, - replicationCount, - }, - func(ctx context.Context) { - checkedIndexCount.Observe(ctx, int64(c.correction.NumberOfCheckedIndex())) - oldIndexCount.Observe(ctx, int64(c.correction.NumberOfCorrectedOldIndex())) - replicationCount.Observe(ctx, int64(c.correction.NumberOfCorrectedReplication())) - }, - ) -} diff --git a/k8s/metrics/grafana/dashboards/09-vald-index-correction.yaml b/k8s/metrics/grafana/dashboards/09-vald-index-correction.yaml deleted file mode 100644 index 031f16864a..0000000000 --- a/k8s/metrics/grafana/dashboards/09-vald-index-correction.yaml +++ /dev/null @@ -1,1410 +0,0 @@ ---- -# -# Copyright (C) 2019-2023 vdaas.org vald team -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# You may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -apiVersion: v1 -kind: ConfigMap -metadata: - name: grafana-dashboards-vald-index-correction -data: - vald-index-correction.json: | - { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 0, - "y": 0 - }, - "id": 2, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/^vald_version$/", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Vald Version", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 4, - "y": 0 - }, - "id": 4, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/^go_version$/", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "label_replace(app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}, \"go_version\", \"v$1\", \"go_version\", \"([^v].*)\")", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Go Version", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 8, - "y": 0 - }, - "id": 6, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/^go_os$/", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Go OS", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 - }, - { - "color": "#d44a3a", - "value": 300 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 0 - }, - "id": 8, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "count(kube_pod_info{namespace=\"$Namespace\", pod=~\"$ReplicaSet.*\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Pods ($ReplicaSet)", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "decimals": 2, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 10000000000 - }, - { - "color": "#d44a3a", - "value": 1000000000000 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 10, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", image!=\"\"})", - "format": "time_series", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total memory working set ($ReplicaSet)", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 8, - "x": 0, - "y": 3 - }, - "id": 12, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/^git_commit$/", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Git Commit", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 8, - "y": 3 - }, - "id": 14, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/^build_time$/", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "app_version_info{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Build at", - "type": "stat" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 6 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "10.2.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(irate(container_cpu_usage_seconds_total{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}[$interval])) by (pod) and on() count(kube_job_created{job_name=\"$ReplicaSet\"}) >= 1", - "interval": "", - "legendFormat": "{{pod}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "CPU", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:76", - "format": "short", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:77", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 - }, - { - "color": "#d44a3a", - "value": 300 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 6 - }, - "id": 27, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "index_job_correction_checked_index_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "checked index count", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 - }, - { - "color": "#d44a3a", - "value": 300 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 16, - "y": 6 - }, - "id": 28, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "index_job_correction_corrected_old_index_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "corrected old index count", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 - }, - { - "color": "#d44a3a", - "value": 300 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 6 - }, - "id": 29, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "10.2.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "index_job_correction_corrected_replication_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "corrected replication count", - "type": "stat" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 12 - }, - "hiddenSeries": false, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "10.2.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$Namespace\", container=\"$ReplicaSet\", pod=~\"$PodName\", image!=\"\"}) by (pod) and on() count(kube_job_created{job_name=\"$ReplicaSet\"}) >= 1", - "interval": "", - "legendFormat": "{{pod}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "Memory working set", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:154", - "format": "decbytes", - "logBase": 1, - "min": "0", - "show": true - }, - { - "$$hashKey": "object:155", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 14 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "10.2.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "goroutine_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$PodName\"}", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{target_pod}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "goroutine count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 20 - }, - "hiddenSeries": false, - "id": 26, - "interval": "", - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "10.2.0", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "increase(gc_count{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_node=~\".+\"}[$interval])", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{target_pod}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "GC count /s", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - } - ], - "refresh": "", - "schemaVersion": 38, - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "default", - "value": "default" - }, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "definition": "label_values(kube_pod_info, namespace)", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "Namespace", - "options": [], - "query": { - "query": "label_values(kube_pod_info, namespace)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": { - "selected": false, - "text": "vald-index-correction", - "value": "vald-index-correction" - }, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "definition": "label_values(app_version_info{server_name=~\"index correction job\"}, kubernetes_name)", - "hide": 0, - "includeAll": false, - "label": "name", - "multi": false, - "name": "ReplicaSet", - "options": [], - "query": { - "query": "label_values(app_version_info{server_name=~\"index correction job\"}, kubernetes_name)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": ".+", - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "definition": "label_values(app_version_info{server_name=~\"index correction job\", kubernetes_name=\"$ReplicaSet\"}, target_pod)", - "hide": 0, - "includeAll": true, - "label": "pod", - "multi": false, - "name": "PodName", - "options": [], - "query": { - "query": "label_values(app_version_info{server_name=~\"index correction job\", kubernetes_name=\"$ReplicaSet\"}, target_pod)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": true, - "text": "1m", - "value": "1m" - }, - "hide": 0, - "label": "interval", - "name": "interval", - "options": [ - { - "selected": true, - "text": "1m", - "value": "1m" - }, - { - "selected": false, - "text": "2m", - "value": "2m" - }, - { - "selected": false, - "text": "5m", - "value": "5m" - }, - { - "selected": false, - "text": "10m", - "value": "10m" - }, - { - "selected": false, - "text": "30m", - "value": "30m" - }, - { - "selected": false, - "text": "1h", - "value": "1h" - }, - { - "selected": false, - "text": "6h", - "value": "6h" - }, - { - "selected": false, - "text": "12h", - "value": "12h" - }, - { - "selected": false, - "text": "1d", - "value": "1d" - } - ], - "query": "1m,2m,5m,10m,30m,1h,6h,12h,1d", - "refresh": 2, - "skipUrlSync": false, - "type": "interval" - } - ] - }, - "time": { - "from": "now-3h", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "Vald Index Correction", - "uid": "a8fc1362-e1b8-419f-91de-5205c1c82476", - "version": 1, - "weekStart": "" - } diff --git a/k8s/metrics/grafana/deployment.yaml b/k8s/metrics/grafana/deployment.yaml index 19fcf14070..44abf79f98 100644 --- a/k8s/metrics/grafana/deployment.yaml +++ b/k8s/metrics/grafana/deployment.yaml @@ -50,8 +50,6 @@ spec: mountPath: /var/lib/grafana/dashboards-vald/07 - name: grafana-dashboards-vald-lb-gateway mountPath: /var/lib/grafana/dashboards-vald/08 - - name: grafana-dashboards-vald-index-correction - mountPath: /var/lib/grafana/dashboards-vald/09 volumes: - name: grafana-datasource-provider configMap: @@ -85,7 +83,3 @@ spec: configMap: defaultMode: 420 name: grafana-dashboards-vald-lb-gateway - - name: grafana-dashboards-vald-index-correction - configMap: - defaultMode: 420 - name: grafana-dashboards-vald-index-correction diff --git a/pkg/index/job/correction/service/corrector.go b/pkg/index/job/correction/service/corrector.go index 9dfe5abb91..f5553c73ba 100644 --- a/pkg/index/job/correction/service/corrector.go +++ b/pkg/index/job/correction/service/corrector.go @@ -52,23 +52,16 @@ const ( type Corrector interface { Start(ctx context.Context) (<-chan error, error) PreStop(ctx context.Context) error - // For metrics - NumberOfCheckedIndex() uint64 - NumberOfCorrectedOldIndex() uint64 - NumberOfCorrectedReplication() uint64 } type correct struct { - cfg *config.Data - discoverer discoverer.Client - agentAddrs []string - indexInfos sync.Map[string, *payload.Info_Index_Count] - uuidsCount uint32 - uncommittedUUIDsCount uint32 - checkedID bbolt.Bbolt - checkedIndexCount atomic.Uint64 - correctedOldIndexCount atomic.Uint64 - correctedReplicationCount atomic.Uint64 + cfg *config.Data + discoverer discoverer.Client + agentAddrs []string + indexInfos sync.Map[string, *payload.Info_Index_Count] + uuidsCount uint32 + uncommittedUUIDsCount uint32 + checkedID bbolt.Bbolt } const filemode = 0o600 @@ -134,18 +127,6 @@ func (c *correct) PreStop(_ context.Context) error { return c.checkedID.Close(true) } -func (c *correct) NumberOfCheckedIndex() uint64 { - return c.checkedIndexCount.Load() -} - -func (c *correct) NumberOfCorrectedOldIndex() uint64 { - return c.correctedOldIndexCount.Load() -} - -func (c *correct) NumberOfCorrectedReplication() uint64 { - return c.correctedReplicationCount.Load() -} - // skipcq: GO-R1005 func (c *correct) correct(ctx context.Context) (err error) { // leftAgentAddrs is the agents' addr that hasn't been corrected yet. @@ -277,7 +258,6 @@ func (c *correct) correct(ctx context.Context) (err error) { // now this id is checked so set it to the disk cache c.checkedID.AsyncSet(bolteg, []byte(id), nil) - c.checkedIndexCount.Add(1) return nil })) @@ -397,7 +377,6 @@ func (c *correct) correctTimestamp(ctx context.Context, targetReplica *vectorRep latest.vec.GetId(), latest.vec.GetTimestamp(), ) - c.correctedOldIndexCount.Add(1) if err := c.updateObject(ctx, replica.addr, latest.vec); err != nil { return err } @@ -438,7 +417,6 @@ func (c *correct) correctReplica( // when there are less replicas than the correct number, add the extra replicas if diff < 0 { log.Infof("replica shortage of vector %s. inserting to other agents...", targetReplica.vec.GetId()) - c.correctedReplicationCount.Add(1) if len(availableAddrs) == 0 { return errors.ErrNoAvailableAgentToInsert } @@ -464,7 +442,6 @@ func (c *correct) correctReplica( // when there are more replicas than the correct number, delete the extra replicas log.Infof("replica oversupply of vector %s. deleting...", targetReplica.vec.GetId()) - c.correctedReplicationCount.Add(1) // delete from myself if err := c.deleteObject(ctx, targetReplica.addr, targetReplica.vec); err != nil { log.Errorf("failed to delete object from agent(%s): %v", targetReplica.addr, err) diff --git a/pkg/index/job/correction/usecase/corrector.go b/pkg/index/job/correction/usecase/corrector.go index 44907b9c86..337b308f3c 100644 --- a/pkg/index/job/correction/usecase/corrector.go +++ b/pkg/index/job/correction/usecase/corrector.go @@ -26,7 +26,6 @@ import ( "github.com/vdaas/vald/internal/net/grpc" "github.com/vdaas/vald/internal/net/grpc/interceptor/server/recover" "github.com/vdaas/vald/internal/observability" - "github.com/vdaas/vald/internal/observability/metrics/index/job/correction" "github.com/vdaas/vald/internal/runner" "github.com/vdaas/vald/internal/safety" "github.com/vdaas/vald/internal/servers/server" @@ -116,10 +115,7 @@ func New(cfg *config.Data) (r runner.Runner, err error) { var obs observability.Observability if cfg.Observability.Enabled { - obs, err = observability.NewWithConfig( - cfg.Observability, - correction.New(corrector), - ) + obs, err = observability.NewWithConfig(cfg.Observability) if err != nil { log.Error("failed to initialize observability") return nil, err