From 6cc01a5070575d0d9fa167b562e7167792ce902e Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Wed, 12 Jul 2023 01:52:20 +0900 Subject: [PATCH] divide latency of CreateIndex and SaveIndex metrics (#2099) * divide latency of CreateIndex and SaveIndex metrics Signed-off-by: Kosuke Morimoto * divide latency of CreateIndex and SaveIndex Vald Agent metrics Signed-off-by: Kosuke Morimoto --------- Signed-off-by: Kosuke Morimoto Co-authored-by: Yusuke Kato --- .../dashboards/00-vald-cluster-overview.yaml | 166 +++++++++++++--- .../grafana/dashboards/01-vald-agent.yaml | 182 +++++++++++++++--- 2 files changed, 294 insertions(+), 54 deletions(-) diff --git a/k8s/metrics/grafana/dashboards/00-vald-cluster-overview.yaml b/k8s/metrics/grafana/dashboards/00-vald-cluster-overview.yaml index 0a9c2a258a..5822416071 100644 --- a/k8s/metrics/grafana/dashboards/00-vald-cluster-overview.yaml +++ b/k8s/metrics/grafana/dashboards/00-vald-cluster-overview.yaml @@ -108,7 +108,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -186,7 +186,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -264,7 +264,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -324,7 +324,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -429,7 +429,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -507,7 +507,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -590,7 +590,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -674,7 +674,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -733,7 +733,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -823,7 +823,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -932,7 +932,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1054,7 +1054,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1163,7 +1163,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1178,7 +1178,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\"}[$interval])) by (le, grpc_server_method))", + "expr": "histogram_quantile(0.5, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\", grpc_server_method!~\".*Index$\"}[$interval])) by (le, grpc_server_method))", "interval": "", "intervalFactor": 1, "legendFormat": "{{grpc_server_method}} p50", @@ -1191,7 +1191,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\"}[$interval])) by (le, grpc_server_method))", + "expr": "histogram_quantile(0.95, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\", grpc_server_method!~\".*Index$\"}[$interval])) by (le, grpc_server_method))", "interval": "", "legendFormat": "{{grpc_server_method}} p95", "range": true, @@ -1203,7 +1203,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\"}[$interval])) by (le, grpc_server_method))", + "expr": "histogram_quantile(0.99, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\", grpc_server_method!~\".*Index$\"}[$interval])) by (le, grpc_server_method))", "interval": "", "legendFormat": "{{grpc_server_method}} p99", "range": true, @@ -1283,7 +1283,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1379,7 +1379,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1475,7 +1475,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1543,6 +1543,126 @@ data: "align": false } }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "hiddenSeries": false, + "id": 183, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.0.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\", grpc_server_method=~\".*Index$\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\", grpc_server_method=~\".*Index$\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "legendFormat": "{{grpc_server_method}} p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=~\"$ReplicaSet\", target_pod=~\"$ValdAgentPodName\", grpc_server_method=~\".*Index$\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "legendFormat": "{{grpc_server_method}} p99", + "range": true, + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Latency (Vald Agent Index)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, { "collapsed": false, "datasource": { @@ -1620,7 +1740,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "repeatDirection": "v", "targets": [ { @@ -1682,7 +1802,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1851,7 +1971,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1943,7 +2063,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.4.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", diff --git a/k8s/metrics/grafana/dashboards/01-vald-agent.yaml b/k8s/metrics/grafana/dashboards/01-vald-agent.yaml index 3e10ce103e..92757ed0c4 100644 --- a/k8s/metrics/grafana/dashboards/01-vald-agent.yaml +++ b/k8s/metrics/grafana/dashboards/01-vald-agent.yaml @@ -108,7 +108,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -186,7 +186,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -264,7 +264,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -346,7 +346,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -430,7 +430,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -509,7 +509,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -587,7 +587,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -669,7 +669,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -750,7 +750,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -832,7 +832,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "repeatDirection": "h", "targets": [ { @@ -914,7 +914,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -995,7 +995,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -1072,7 +1072,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -1146,7 +1146,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -1221,7 +1221,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -1296,7 +1296,7 @@ data: "text": {}, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -1364,7 +1364,7 @@ data: }, "textMode": "auto" }, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "targets": [ { "datasource": { @@ -1422,7 +1422,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1547,7 +1547,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1672,7 +1672,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1768,7 +1768,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "10.0.0", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1887,7 +1887,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.2.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -1994,7 +1994,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.2.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -2090,7 +2090,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.2.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -2199,7 +2199,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.2.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -2214,7 +2214,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "expr": "histogram_quantile(0.5, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\", grpc_server_method!~\".*Index$\"}[$interval])) by (le, grpc_server_method))", "interval": "", "intervalFactor": 1, "legendFormat": "{{grpc_server_method}} p50", @@ -2227,7 +2227,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "expr": "histogram_quantile(0.95, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\", grpc_server_method!~\".*Index$\"}[$interval])) by (le, grpc_server_method))", "interval": "", "legendFormat": "{{grpc_server_method}} p95", "range": true, @@ -2239,7 +2239,7 @@ data: "uid": "prometheus" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\"}[$interval])) by (le, grpc_server_method))", + "expr": "histogram_quantile(0.99, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\", grpc_server_method!~\".*Index$\"}[$interval])) by (le, grpc_server_method))", "interval": "", "legendFormat": "{{grpc_server_method}} p99", "range": true, @@ -2312,7 +2312,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.2.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -2374,6 +2374,12 @@ data: "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 0, "fillGradient": 0, "gridPos": { @@ -2383,6 +2389,120 @@ data: "y": 47 }, "hiddenSeries": false, + "id": 44, + "interval": "", + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.0.1", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\", grpc_server_method=~\".*Index$\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{grpc_server_method}} p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\", grpc_server_method=~\".*Index$\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "legendFormat": "{{grpc_server_method}} p95", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(server_latency_bucket{exported_kubernetes_namespace=\"$Namespace\", kubernetes_name=\"$ReplicaSet\", target_pod=~\"$PodName\", grpc_server_method=~\".*Index$\"}[$interval])) by (le, grpc_server_method))", + "interval": "", + "legendFormat": "{{grpc_server_method}} p99", + "range": true, + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Latency (Index)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "logBase": 1, + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 55 + }, + "hiddenSeries": false, "id": 41, "legend": { "avg": false, @@ -2400,7 +2520,7 @@ data: "alertThreshold": true }, "percentage": false, - "pluginVersion": "9.2.3", + "pluginVersion": "10.0.1", "pointradius": 2, "points": false, "renderer": "flot", @@ -2556,7 +2676,7 @@ data: "auto_count": 30, "auto_min": "10s", "current": { - "selected": true, + "selected": false, "text": "5m", "value": "5m" },