Skip to content

Commit

Permalink
Improve Dashboards via chained variables and replace dashboard sync (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
geekflyer authored Oct 15, 2022
1 parent 486bc8e commit 9d91824
Show file tree
Hide file tree
Showing 58 changed files with 3,209 additions and 1,899 deletions.
179 changes: 112 additions & 67 deletions dashboards/api.json

Large diffs are not rendered by default.

Binary file modified dashboards/api.json.gz
Binary file not shown.
198 changes: 120 additions & 78 deletions dashboards/blockchain-health.json

Large diffs are not rendered by default.

Binary file modified dashboards/blockchain-health.json.gz
Binary file not shown.
190 changes: 116 additions & 74 deletions dashboards/component-health-dashboard.json

Large diffs are not rendered by default.

Binary file modified dashboards/component-health-dashboard.json.gz
Binary file not shown.
292 changes: 167 additions & 125 deletions dashboards/consensus.json

Large diffs are not rendered by default.

Binary file modified dashboards/consensus.json.gz
Binary file not shown.
135 changes: 100 additions & 35 deletions dashboards/ddos.json
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "sum by (role_type, network_id) (aptos_connections{chain_name=~\"$chain_name\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", direction=\"inbound\"})",
"expr": "sum by (role_type, network_id) (aptos_connections{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", direction=\"inbound\"})",
"legendFormat": "{{role_type}}-{{network_id}}",
"refId": "A"
}
Expand Down Expand Up @@ -170,7 +170,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "sum by (role_type, network_id) (rate(aptos_network_pending_connection_upgrades{chain_name=~\"$chain_name\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", direction=\"inbound\" }[$AggInterval]))",
"expr": "sum by (role_type, network_id) (rate(aptos_network_pending_connection_upgrades{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", direction=\"inbound\" }[$AggInterval]))",
"legendFormat": "{{role_type}}-{{network_id}}",
"refId": "A"
}
Expand Down Expand Up @@ -233,7 +233,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "rate(aptos_connections_rejected{chain_name=~\"$chain_name\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", direction=\"inbound\"}[1m])",
"expr": "rate(aptos_connections_rejected{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", direction=\"inbound\"}[1m])",
"legendFormat": "{{role_type}}-{{network_id}}",
"refId": "A"
}
Expand Down Expand Up @@ -296,7 +296,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "sum by (role_type, network_id)(rate(aptos_network_rpc_bytes{chain_name=~\"$chain_name\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", state=\"received\"}[$AggInterval]))",
"expr": "sum by (role_type, network_id)(rate(aptos_network_rpc_bytes{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", state=\"received\"}[$AggInterval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{role_type}}-{{network_id}}",
Expand Down Expand Up @@ -361,7 +361,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "sum by (role_type, network_id)(rate(aptos_network_rpc_messages{chain_name=~\"$chain_name\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", type=\"request\", state=\"received\"}[$AggInterval]))",
"expr": "sum by (role_type, network_id)(rate(aptos_network_rpc_messages{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", type=\"request\", state=\"received\"}[$AggInterval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{role_type}}-{{network_id}}",
Expand Down Expand Up @@ -426,7 +426,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "sum by (role_type, network_id)(rate(aptos_network_direct_send_bytes{chain_name=~\"$chain_name\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", state=\"received\"}[$AggInterval]))",
"expr": "sum by (role_type, network_id)(rate(aptos_network_direct_send_bytes{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", state=\"received\"}[$AggInterval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{role_type}}-{{network_id}}",
Expand Down Expand Up @@ -491,7 +491,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "sum by (role_type, network_id)(rate(aptos_network_direct_send_messages{chain_name=~\"$chain_name\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", state=\"received\"}[$AggInterval]))",
"expr": "sum by (role_type, network_id)(rate(aptos_network_direct_send_messages{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", network_id=~\"$network_id\", role_type=~\"$role_type\", state=\"received\"}[$AggInterval]))",
"legendFormat": "{{role_type}}-{{network_id}}",
"refId": "A"
}
Expand Down Expand Up @@ -554,7 +554,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "rate(aptos_network_rate_limit_sum{chain_name=~\"$chain_name\", namespace=~\"$namespace\", metric=\"allowed\", direction=\"inbound\"}[$AggInterval])",
"expr": "rate(aptos_network_rate_limit_sum{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", metric=\"allowed\", direction=\"inbound\"}[$AggInterval])",
"refId": "A"
}
],
Expand Down Expand Up @@ -616,7 +616,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "rate(aptos_network_rate_limit_sum{chain_name=~\"$chain_name\", namespace=~\"$namespace\", metric=\"throttled\", direction=\"inbound\"}[$AggInterval])",
"expr": "rate(aptos_network_rate_limit_sum{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", metric=\"throttled\", direction=\"inbound\"}[$AggInterval])",
"refId": "A"
}
],
Expand Down Expand Up @@ -962,7 +962,7 @@
"targets": [
{
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"expr": "kubelet_volume_stats_used_bytes{chain_name=~\"$chain_name\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*-($container)-e.*\"} / kubelet_volume_stats_capacity_bytes{chain_name=~\"$chain_name\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*-($container)-e.*\"}",
"expr": "kubelet_volume_stats_used_bytes{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*-($container)-e.*\"} / kubelet_volume_stats_capacity_bytes{chain_name=~\"$chain_name\", cluster=~\"$cluster\", metrics_source=~\"$metrics_source\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*-($container)-e.*\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{persistentvolumeclaim}}",
Expand All @@ -988,7 +988,7 @@
{
"allFormat": "",
"allValue": "",
"current": { "selected": false, "text": "VictoriaMetrics Mainnet US", "value": "VictoriaMetrics Mainnet US" },
"current": { "text": ["VictoriaMetrics Mainnet US"], "value": "VictoriaMetrics Mainnet US" },
"hide": 0,
"includeAll": false,
"label": "",
Expand All @@ -1004,44 +1004,109 @@
"type": "datasource"
},
{
"name": "metrics_source",
"type": "query",
"datasource": null,
"refresh": 1,
"options": [],
"includeAll": true,
"allFormat": "",
"allValue": "",
"current": { "selected": false, "text": "default", "value": "default" },
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"definition": "",
"hide": 0,
"includeAll": false,
"label": "",
"allValue": ".*",
"multi": false,
"multiFormat": "",
"name": "namespace",
"options": [],
"query": { "query": "label_values(namespace)", "refId": "Prometheus-chain-name-Variable-Query" },
"refresh": 1,
"query": {
"query": "label_values(node_process_start_time{}, metrics_source)",
"refId": "StandardVariableQuery"
},
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
"current": { "text": ["telemetry-service"], "value": "telemetry-service" },
"label": "",
"hide": 0,
"sort": 0
},
{
"allFormat": "",
"allValue": "",
"current": { "selected": false, "text": "aws-mainnet-node1", "value": "aws-mainnet-node1" },
"name": "chain_name",
"type": "query",
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"definition": "",
"hide": 0,
"refresh": 1,
"options": [],
"includeAll": false,
"allFormat": "",
"allValue": "",
"multi": false,
"multiFormat": "",
"query": {
"query": "label_values(node_process_start_time{metrics_source=~\"$metrics_source\"}, chain_name)",
"refId": "StandardVariableQuery"
},
"regex": "",
"current": { "text": ["mainnet"], "value": "mainnet" },
"label": "",
"hide": 0,
"sort": 1
},
{
"name": "cluster",
"type": "query",
"datasource": null,
"refresh": 1,
"options": [],
"includeAll": true,
"allFormat": "",
"allValue": ".*",
"multi": false,
"multiFormat": "",
"name": "chain_name",
"query": {
"query": "label_values(node_process_start_time{metrics_source=~\"$metrics_source\", chain_name=~\"$chain_name\"}, cluster)",
"refId": "StandardVariableQuery"
},
"regex": "",
"current": { "text": ["All"], "value": "$__all" },
"label": "",
"hide": 0,
"sort": 0
},
{
"name": "namespace",
"type": "query",
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"refresh": 1,
"options": [],
"query": { "query": "label_values(chain_name)", "refId": "Prometheus-chain-name-Variable-Query" },
"includeAll": true,
"allFormat": "",
"allValue": ".*",
"multi": false,
"multiFormat": "",
"query": {
"query": "label_values(node_process_start_time{metrics_source=~\"$metrics_source\", chain_name=~\"$chain_name\", cluster=~\"$cluster\"}, namespace)",
"refId": "StandardVariableQuery"
},
"regex": "",
"current": { "text": ["All"], "value": "$__all" },
"label": "",
"hide": 0,
"sort": 1
},
{
"name": "kubernetes_pod_name",
"type": "query",
"datasource": { "type": "prometheus", "uid": "${Datasource}" },
"refresh": 1,
"options": [],
"includeAll": true,
"allFormat": "",
"allValue": ".*",
"multi": true,
"multiFormat": "",
"query": {
"query": "label_values(node_process_start_time{metrics_source=~\"$metrics_source\", chain_name=~\"$chain_name\", cluster=~\"$cluster\", namespace=~\"$namespace\"}, kubernetes_pod_name)",
"refId": "StandardVariableQuery"
},
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
"current": { "text": ["All"], "value": ["$__all"] },
"label": "",
"hide": 0,
"sort": 1
},
{
"allFormat": "",
Expand Down
Binary file modified dashboards/ddos.json.gz
Binary file not shown.
Loading

0 comments on commit 9d91824

Please sign in to comment.