From daad25730eede17bbf0d6cb77da8ee2fb45bb073 Mon Sep 17 00:00:00 2001 From: Alexander Apalikov <alexander.apalikov@globant.com> Date: Tue, 5 Mar 2019 14:57:57 +0300 Subject: [PATCH] Adding Kubernetes API server requests metrics New dashboard with dropdown Agones CRD selector. Added docs. Contains 4 graphs as proposed in the ticket. --- .../grafana/dashboard-apiserver-requests.yaml | 978 ++++++++++++++++++ site/content/en/docs/Guides/metrics.md | 2 + 2 files changed, 980 insertions(+) create mode 100644 build/grafana/dashboard-apiserver-requests.yaml diff --git a/build/grafana/dashboard-apiserver-requests.yaml b/build/grafana/dashboard-apiserver-requests.yaml new file mode 100644 index 0000000000..46fcb85261 --- /dev/null +++ b/build/grafana/dashboard-apiserver-requests.yaml @@ -0,0 +1,978 @@ +# Copyright 2019 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# configs map used by grafana +apiVersion: v1 +kind: ConfigMap +metadata: + name: agones-apiserver-requests + namespace: metrics + labels: + grafana_dashboard: "1" +data: + dashboard-agones-apiserver-requests.json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "iteration": 1551904655558, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "API Server Main Resource Request count for Fleet/Gameserver/Gameserverset", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(apiserver_request_count{resource=~\"[[CustomResourceDefinition]]\",verb!~\"WATCH|LIST\",subresource=~\"\"}[5m])) by (resource,verb)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{verb}} {{resource}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Main Resource Request Count per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "reqps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "API Server Request count for Fleet/Gameserver/Gameserverset", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(apiserver_request_count{resource=~\"[[CustomResourceDefinition]]\",verb!~\"WATCH|LIST\",subresource!~\"\"}[5m])) by (resource,subresource,verb)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{verb}} {{resource}} {{subresource}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Subresource Request Count per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "reqps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "type": "dashboard" + } + ], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatDirection": "h", + "seriesOverrides": [], + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(apiserver_request_count{code=\"409\",resource=~\"[[CustomResourceDefinition]]\", subresource=~[[Subresource]]}[5m])) by (code, resource, subresource)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{resource}} {{subresource}} {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Error Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "description": "409 error on Update of Resources", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(apiserver_request_count{code=\"409\",resource=~\"[[CustomResourceDefinition]]\",subresource=[[Subresource]]}[5m])) by (code, resource, subresource)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{resource}} {{subresource}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Update Conflicts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(apiserver_request_latencies_summary{resource=~\"[[CustomResourceDefinition]]\", verb=~\"[[Verb]]\", quantile=\"0.5\"}/1000) by (resource, subresource, verb)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{verb}} {{resource}} {{subresource}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Latency 50%", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "description": "Request Latency, 90% quantile with verb selector", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(apiserver_request_latencies_summary{resource=~\"[[CustomResourceDefinition]]\", verb=~\"[[Verb]]\", quantile=\"0.9\"}/1000) by (resource, subresource, verb)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{verb}} {{resource}} {{subresource}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Latency 90%", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "description": "Request Latency, 95% quantile with verb selector", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(apiserver_request_latencies_summary{resource=~\"[[CustomResourceDefinition]]\", verb=~\"[[Verb]]\", quantile=\"0.95\"}/1000) by (resource, subresource, verb)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{verb}} {{resource}} {{subresource}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Latency 95%", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "description": "Request Latency, 99% quantile with verb selector", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "paceLength": 10, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(apiserver_request_latencies_summary{resource=~\"[[CustomResourceDefinition]]\", verb=~\"[[Verb]]\", quantile=\"0.99\"}/1000) by (resource, subresource, verb)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{verb}} {{resource}} {{subresource}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Request Latency 99%", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "5s", + "schemaVersion": 18, + "style": "dark", + "tags": [ + "agones", + "controller", + "server" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "tags": [], + "text": "All", + "value": [ + "$__all" + ] + }, + "hide": 0, + "includeAll": true, + "label": "CRD", + "multi": true, + "name": "CustomResourceDefinition", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "fleetautoscalers", + "value": "fleetautoscalers" + }, + { + "selected": false, + "text": "fleets", + "value": "fleets" + }, + { + "selected": false, + "text": "gameservers", + "value": "gameservers" + }, + { + "selected": false, + "text": "gameserversets", + "value": "gameserversets" + }, + { + "selected": false, + "text": "gameserverallocations", + "value": "gameserverallocations" + } + ], + "query": "fleets, gameservers, gameserversets, gameserverallocations", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": { + "tags": [], + "text": "\"\"", + "value": "\"\"" + }, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "Subresource", + "options": [ + { + "selected": false, + "text": "\"status\"", + "value": "\"status\"" + }, + { + "selected": false, + "text": "\"scale\"", + "value": "\"scale\"" + }, + { + "selected": true, + "text": "\"\"", + "value": "\"\"" + } + ], + "query": "\"status\", \"scale\", \"\"", + "skipUrlSync": false, + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(verb)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "Verb", + "options": [ + { + "selected": true, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "CONNECT", + "value": "CONNECT" + }, + { + "selected": false, + "text": "CREATE", + "value": "CREATE" + }, + { + "selected": false, + "text": "DELETE", + "value": "DELETE" + }, + { + "selected": false, + "text": "DELETECOLLECTION", + "value": "DELETECOLLECTION" + }, + { + "selected": false, + "text": "GET", + "value": "GET" + }, + { + "selected": false, + "text": "PATCH", + "value": "PATCH" + }, + { + "selected": false, + "text": "POST", + "value": "POST" + }, + { + "selected": false, + "text": "PUT", + "value": "PUT" + }, + { + "selected": false, + "text": "UPDATE", + "value": "UPDATE" + } + ], + "query": "label_values(verb)", + "refresh": 0, + "regex": "/[^WATCH][^LIST]/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Agones Controller API Server Requests", + "uid": "1NHaKICiz", + "version": 1 + } \ No newline at end of file diff --git a/site/content/en/docs/Guides/metrics.md b/site/content/en/docs/Guides/metrics.md index bd96886d54..0495d06f28 100644 --- a/site/content/en/docs/Guides/metrics.md +++ b/site/content/en/docs/Guides/metrics.md @@ -84,6 +84,8 @@ We provide a set of useful [Grafana](https://grafana.com/) dashboards to monitor - {{< ghlink href="/build/grafana/dashboard-goclient-workqueues.yaml" branch="master" >}}Agones Controller go-client workqueues{{< /ghlink >}} displays Agones Controller workqueue processing time and rates. +- {{< ghlink href="/build/grafana/dashboard-apiserver-requests.yaml" branch="master" >}}Agones Controller API Server requests{{< /ghlink >}} displays your current API server request rate, errors rate and request latencies with optional CustomResourceDefinition filtering by Types: fleets, gameserversets, gameservers, gamserverallocations. + Dashboard screenshots : ![grafana dashboard autoscalers](../../../images/grafana-dashboard-autoscalers.png)