diff --git a/CHANGELOG.md b/CHANGELOG.md index 56fe9494c10..b937ddb2a56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ * Rollout progress dashboard. #8779 * Alertmanager dashboard. #8792 * Ruler dashboard: `cortex_request_duration_seconds` metric. #8795 + * Queries dashboard: `cortex_request_duration_seconds` metric. #8800 * [ENHANCEMENT] Alerts: `MimirRunningIngesterReceiveDelayTooHigh` alert has been tuned to be more reactive to high receive delay. #8538 * [ENHANCEMENT] Dashboards: improve end-to-end latency and strong read consistency panels when experimental ingest storage is enabled. #8543 * [ENHANCEMENT] Dashboards: Add panels for monitoring ingester autoscaling when not using ingest-storage. These panels are disabled by default, but can be enabled using the `autoscaling.ingester.enabled: true` config option. #8484 diff --git a/operations/helm/charts/mimir-distributed/CHANGELOG.md b/operations/helm/charts/mimir-distributed/CHANGELOG.md index ba5cfdcc747..985b55554cc 100644 --- a/operations/helm/charts/mimir-distributed/CHANGELOG.md +++ b/operations/helm/charts/mimir-distributed/CHANGELOG.md @@ -36,6 +36,7 @@ Entries should include a reference to the Pull Request that introduced the chang * Rollout progress dashboard. #8779 * Alertmanager dashboard. #8792 * Ruler dashboard: `cortex_request_duration_seconds` metric. #8795 + * Queries dashboard: `cortex_request_duration_seconds` metric. #8800 * [ENHANCEMENT] Memcached: Update to Memcached 1.6.28 and memcached-exporter 0.14.4. #8557 * [ENHANCEMENT] Add missing fields in multiple topology spread constraints. #8533 * [ENHANCEMENT] Add support for setting the image pull secrets, node selectors, tolerations and topology spread constraints for the Grafana Agent pods used for metamonitoring. #8670 diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml index 317b281a6dd..2787479de40 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml @@ -13636,6 +13636,35 @@ data: "tagsQuery": "", "type": "query", "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false } ] }, diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-queries.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-queries.json index 45f4fdebc57..d4921f3c52f 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-queries.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-queries.json @@ -2496,6 +2496,35 @@ "tagsQuery": "", "type": "query", "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false } ] }, diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-queries.json b/operations/mimir-mixin-compiled/dashboards/mimir-queries.json index 1913205929a..01b96aec49f 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-queries.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-queries.json @@ -2496,6 +2496,35 @@ "tagsQuery": "", "type": "query", "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false } ] }, diff --git a/operations/mimir-mixin/dashboards/queries.libsonnet b/operations/mimir-mixin/dashboards/queries.libsonnet index 64c325600e2..011d6d39e9b 100644 --- a/operations/mimir-mixin/dashboards/queries.libsonnet +++ b/operations/mimir-mixin/dashboards/queries.libsonnet @@ -6,6 +6,7 @@ local filename = 'mimir-queries.json'; assert std.md5(filename) == 'b3abe8d5c040395cc36615cb4334c92d' : 'UID of the dashboard has changed, please update references to dashboard.'; ($.dashboard('Queries') + { uid: std.md5(filename) }) .addClusterSelectorTemplates() + .addShowNativeLatencyVariable() .addRow( $.row('Query-frontend') .addPanel( @@ -235,23 +236,27 @@ local filename = 'mimir-queries.json'; ||| ) + $.queryPanel( - [ + local ncSumRate = utils.ncHistogramSumBy(utils.ncHistogramCountRate($.queries.ingester.requestsPerSecondMetric, $.queries.ingester.readRequestsPerSecondSelector)); + local scSuccessful = ||| ( sum(rate(cortex_ingest_storage_strong_consistency_requests_total{%s}[$__rate_interval])) - sum(rate(cortex_ingest_storage_strong_consistency_failures_total{%s}[$__rate_interval])) ) - / - sum(rate(cortex_request_duration_seconds_count{%s,route=~"%s"}[$__rate_interval])) - ||| % [$.jobMatcher($._config.job_names.ingester), $.jobMatcher($._config.job_names.ingester), $.jobMatcher($._config.job_names.ingester), $._config.ingester_read_path_routes_regex], + ||| % [$.jobMatcher($._config.job_names.ingester), $.jobMatcher($._config.job_names.ingester)]; + local scFailed = ||| sum(rate(cortex_ingest_storage_strong_consistency_failures_total{%s}[$__rate_interval])) - / - sum(rate(cortex_request_duration_seconds_count{%s,route=~"%s"}[$__rate_interval])) - ||| % [$.jobMatcher($._config.job_names.ingester), $.jobMatcher($._config.job_names.ingester), $._config.ingester_read_path_routes_regex], + ||| % [$.jobMatcher($._config.job_names.ingester)]; + local scRate(sc, rate) = std.join(' / ', [sc, rate]); + [ + scRate(scSuccessful, utils.showClassicHistogramQuery(ncSumRate)), + scRate(scSuccessful, utils.showNativeHistogramQuery(ncSumRate)), + scRate(scFailed, utils.showClassicHistogramQuery(ncSumRate)), + scRate(scFailed, utils.showNativeHistogramQuery(ncSumRate)), ], - ['successful', 'failed'], + ['successful', 'successful', 'failed', 'failed'], ) + $.aliasColors({ failed: $._colors.failed, successful: $._colors.success }) + { fieldConfig+: { defaults+: { unit: 'percentunit', min: 0, max: 1 } } }