diff --git a/charts/seed-monitoring/charts/grafana/dashboards/owners/apiserver-overview.json b/charts/seed-monitoring/charts/grafana/dashboards/owners/apiserver-overview.json index 918ca0eaf004..0fd9874128af 100644 --- a/charts/seed-monitoring/charts/grafana/dashboards/owners/apiserver-overview.json +++ b/charts/seed-monitoring/charts/grafana/dashboards/owners/apiserver-overview.json @@ -2748,9 +2748,11 @@ "steppedLine": true, "targets": [ { - "expr": "sum(apiserver_registered_watchers{pod=~\"$apiserver\"})by(kind)", + "exemplar": true, + "expr": " sum by (kind) (apiserver_registered_watchers{pod=~\"$apiserver\",kind!=\"\"})\n + on () group_left ()\n absent(apiserver_longrunning_requests) * 0\nor\n sum by (resource) (apiserver_longrunning_requests{pod=~\"$apiserver\",verb=\"WATCH\"})", + "hide": false, "interval": "", - "legendFormat": "{{kind}}", + "legendFormat": "{{resource}}{{kind}}", "refId": "A" } ], diff --git a/charts/seed-monitoring/charts/grafana/dashboards/owners/kubernetes-api-server-details.json b/charts/seed-monitoring/charts/grafana/dashboards/owners/kubernetes-api-server-details.json index 09824e448a9d..987c3b19a433 100644 --- a/charts/seed-monitoring/charts/grafana/dashboards/owners/kubernetes-api-server-details.json +++ b/charts/seed-monitoring/charts/grafana/dashboards/owners/kubernetes-api-server-details.json @@ -902,13 +902,17 @@ "hiddenSeries": false, "id": 55, "legend": { + "alignAsTable": true, "avg": false, "current": false, - "max": false, + "max": true, "min": false, + "rightSide": true, "show": true, + "sort": "max", + "sortDesc": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -927,9 +931,11 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(apiserver_dropped_requests_total[$rate])) by (requestKind)", - "legendFormat": "{{ requestKind }}", + "exemplar": true, + "expr": "sum(rate(apiserver_request_terminations_total{code=\"429\"}[$__rate_interval])) by (verb, group, version, resource, subresource)", + "hide": false, "interval": "", + "legendFormat": "{{verb}} {{group}}/{{version}}/{{resource}} {{subresource}}", "refId": "A" } ], @@ -939,7 +945,7 @@ "timeShift": null, "title": "Dropped Requests", "tooltip": { - "shared": true, + "shared": false, "sort": 0, "value_type": "individual" }, diff --git a/charts/seed-monitoring/charts/grafana/dashboards/owners/kubernetes-control-plane-status-dashboard.json b/charts/seed-monitoring/charts/grafana/dashboards/owners/kubernetes-control-plane-status-dashboard.json index 7e85e8349d96..760c89291f9d 100644 --- a/charts/seed-monitoring/charts/grafana/dashboards/owners/kubernetes-control-plane-status-dashboard.json +++ b/charts/seed-monitoring/charts/grafana/dashboards/owners/kubernetes-control-plane-status-dashboard.json @@ -1319,17 +1319,21 @@ "steppedLine": false, "targets": [ { - "expr": "apiserver_registered_watchers", + "exemplar": true, + "expr": " sum by (group, version, kind) (apiserver_registered_watchers)\n + on () group_left ()\n absent(apiserver_longrunning_requests) * 0\nor\n sum by (group, version, resource) (apiserver_longrunning_requests)", "format": "time_series", + "hide": false, + "interval": "", "intervalFactor": 1, - "legendFormat": "{{group}}/{{version}}/{{kind}}", + "legendFormat": "{{group}}/{{version}}/{{resource}}{{kind}}", "refId": "A" }, { - "expr": "count(apiserver_registered_watchers)\n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "total", + "exemplar": true, + "expr": "sum(\n sum by (group, version, kind) (apiserver_registered_watchers)\n + on () group_left ()\n absent(apiserver_longrunning_requests) * 0\n or\n sum by (group, version, resource) (apiserver_longrunning_requests)\n)", + "hide": false, + "interval": "", + "legendFormat": "Total", "refId": "B" } ], @@ -1353,15 +1357,17 @@ }, "yaxes": [ { + "$$hashKey": "object:244", "decimals": 0, "format": "short", "label": "Count Watches", - "logBase": 10, + "logBase": 1, "max": null, "min": null, "show": true }, { + "$$hashKey": "object:245", "format": "short", "label": null, "logBase": 1, diff --git a/pkg/operation/botanist/component/kubeapiserver/monitoring.go b/pkg/operation/botanist/component/kubeapiserver/monitoring.go index 910f955807ee..0bc394ae51e7 100644 --- a/pkg/operation/botanist/component/kubeapiserver/monitoring.go +++ b/pkg/operation/botanist/component/kubeapiserver/monitoring.go @@ -43,6 +43,7 @@ const ( monitoringMetricApiserverCRDWebhookConversionDurationSeconds = "apiserver_crd_webhook_conversion_duration_seconds_.+" monitoringMetricApiserverCurrentInflightRequests = "apiserver_current_inflight_requests" monitoringMetricApiserverCurrentInqueueRequests = "apiserver_current_inqueue_requests" + monitoringMetricApiserverLongrunningRequests = "apiserver_longrunning_requests" monitoringMetricApiserverResponseSizes = "apiserver_response_sizes_.+" monitoringMetricApiserverRegisteredWatchers = "apiserver_registered_watchers" monitoringMetricApiserverRequestDurationSeconds = "apiserver_request_duration_seconds_.+" @@ -218,6 +219,7 @@ var ( monitoringMetricApiserverCRDWebhookConversionDurationSeconds, monitoringMetricApiserverCurrentInflightRequests, monitoringMetricApiserverCurrentInqueueRequests, + monitoringMetricApiserverLongrunningRequests, monitoringMetricApiserverResponseSizes, monitoringMetricApiserverRegisteredWatchers, monitoringMetricApiserverRequestDurationSeconds, diff --git a/pkg/operation/botanist/component/kubeapiserver/monitoring_test.go b/pkg/operation/botanist/component/kubeapiserver/monitoring_test.go index fafd4265cc63..a06af9814a10 100644 --- a/pkg/operation/botanist/component/kubeapiserver/monitoring_test.go +++ b/pkg/operation/botanist/component/kubeapiserver/monitoring_test.go @@ -71,7 +71,7 @@ relabel_configs: metric_relabel_configs: - source_labels: [ __name__ ] action: keep - regex: ^(authentication_attempts|authenticated_user_requests|apiserver_admission_controller_admission_duration_seconds_.+|apiserver_admission_webhook_admission_duration_seconds_.+|apiserver_admission_step_admission_duration_seconds_.+|apiserver_admission_webhook_rejection_count|apiserver_audit_event_total|apiserver_audit_error_total|apiserver_audit_requests_rejected_total|apiserver_latency_seconds|apiserver_crd_webhook_conversion_duration_seconds_.+|apiserver_current_inflight_requests|apiserver_current_inqueue_requests|apiserver_response_sizes_.+|apiserver_registered_watchers|apiserver_request_duration_seconds_.+|apiserver_request_terminations_total|apiserver_request_total|apiserver_request_count|apiserver_storage_transformation_duration_seconds_.+|apiserver_storage_transformation_operations_total|apiserver_init_events_total|apiserver_watch_events_sizes_.+|apiserver_watch_events_total|etcd_db_total_size_in_bytes|apiserver_storage_db_total_size_in_bytes|etcd_object_counts|apiserver_storage_objects|etcd_request_duration_seconds_.+|go_.+|process_max_fds|process_open_fds|watch_cache_capacity_increase_total|watch_cache_capacity_decrease_total|watch_cache_capacity|apiserver_cache_list_.+|apiserver_storage_list_.+)$ + regex: ^(authentication_attempts|authenticated_user_requests|apiserver_admission_controller_admission_duration_seconds_.+|apiserver_admission_webhook_admission_duration_seconds_.+|apiserver_admission_step_admission_duration_seconds_.+|apiserver_admission_webhook_rejection_count|apiserver_audit_event_total|apiserver_audit_error_total|apiserver_audit_requests_rejected_total|apiserver_latency_seconds|apiserver_crd_webhook_conversion_duration_seconds_.+|apiserver_current_inflight_requests|apiserver_current_inqueue_requests|apiserver_longrunning_requests|apiserver_response_sizes_.+|apiserver_registered_watchers|apiserver_request_duration_seconds_.+|apiserver_request_terminations_total|apiserver_request_total|apiserver_request_count|apiserver_storage_transformation_duration_seconds_.+|apiserver_storage_transformation_operations_total|apiserver_init_events_total|apiserver_watch_events_sizes_.+|apiserver_watch_events_total|etcd_db_total_size_in_bytes|apiserver_storage_db_total_size_in_bytes|etcd_object_counts|apiserver_storage_objects|etcd_request_duration_seconds_.+|go_.+|process_max_fds|process_open_fds|watch_cache_capacity_increase_total|watch_cache_capacity_decrease_total|watch_cache_capacity|apiserver_cache_list_.+|apiserver_storage_list_.+)$ ` expectedAlertingRule = `groups: