diff --git a/operations/alloy-mixin/alerts/clustering.libsonnet b/operations/alloy-mixin/alerts/clustering.libsonnet index 5dbcf5710f..4fd75c7615 100644 --- a/operations/alloy-mixin/alerts/clustering.libsonnet +++ b/operations/alloy-mixin/alerts/clustering.libsonnet @@ -11,7 +11,7 @@ local alert = import './utils/alert.jsonnet'; if enableK8sCluster then 'stddev by (cluster, namespace, job, cluster_name) (sum without (state) (cluster_node_peers)) != 0' else - 'stddev by (job) (sum without (state) (cluster_node_peers)) != 0', + 'stddev by (job, cluster_name) (sum without (state) (cluster_node_peers)) != 0', 'Cluster is not converging.', 'Cluster is not converging: nodes report different number of peers in the cluster. Job is {{ $labels.job }}', '10m', @@ -29,8 +29,8 @@ local alert = import './utils/alert.jsonnet'; count by (cluster, namespace, job, cluster_name) (cluster_node_info) ||| else ||| sum without (state) (cluster_node_peers) != - on (job) group_left - count by (job) (cluster_node_info) + on (job, cluster_name) group_left + count by (job, cluster_name) (cluster_node_info) ||| , 'Nodes report different number of peers vs. the count of observed Alloy metrics.', @@ -55,7 +55,7 @@ local alert = import './utils/alert.jsonnet'; if enableK8sCluster then 'sum by (cluster, namespace, job, cluster_name) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0' else - 'sum by (job) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0' + 'sum by (job, cluster_name) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0' , 'Cluster Node Name Conflict.', 'A node tried to join the cluster with a name conflicting with an existing peer. Job is {{ $labels.job }}', @@ -68,7 +68,7 @@ local alert = import './utils/alert.jsonnet'; if enableK8sCluster then 'sum by (cluster, namespace, job, instance, cluster_name) (cluster_node_peers{state="terminating"}) > 0' else - 'sum by (job, instance) (cluster_node_peers{state="terminating"}) > 0' + 'sum by (job, instance, cluster_name) (cluster_node_peers{state="terminating"}) > 0' , 'Cluster node stuck in Terminating state.', 'There is a node within the cluster that is stuck in Terminating state. Job is {{ $labels.job }}', @@ -84,7 +84,7 @@ local alert = import './utils/alert.jsonnet'; ) > 1 ||| else ||| count without (sha256) ( - max by (sha256, job) (alloy_config_hash and on(job) cluster_node_info) + max by (sha256, job, cluster_name) (alloy_config_hash and on(job) cluster_node_info) ) > 1 ||| , diff --git a/operations/alloy-mixin/config.libsonnet b/operations/alloy-mixin/config.libsonnet index 58fa6101c4..5fa4fa03a8 100644 --- a/operations/alloy-mixin/config.libsonnet +++ b/operations/alloy-mixin/config.libsonnet @@ -5,7 +5,7 @@ enableLokiLogs: true, filterSelector: '', #use it to filter specific metric label values, ie: job=~"integrations/alloy" k8sClusterSelector: 'cluster=~"$cluster", namespace=~"$namespace"', - groupSelector: if self.enableK8sCluster then self.k8sClusterSelector + ', job=~"$job"' else 'job=~"$job"', + groupSelector: if self.enableK8sCluster then self.k8sClusterSelector + ', job=~"$job"' else 'job=~"$job"' + if self.enableAlloyCluster then ', cluster_name=~"$alloyCluster"' else '', instanceSelector: self.groupSelector + ', instance=~"$instance"', logsFilterSelector: '', #use to filter logs originated from alloy, and avoid picking up other platform logs, ie: service_name="alloy" dashboardTag: 'alloy-mixin', diff --git a/operations/alloy-mixin/dashboards/cluster-node.libsonnet b/operations/alloy-mixin/dashboards/cluster-node.libsonnet index cc5046132d..ff7a6c1919 100644 --- a/operations/alloy-mixin/dashboards/cluster-node.libsonnet +++ b/operations/alloy-mixin/dashboards/cluster-node.libsonnet @@ -7,7 +7,8 @@ local filename = 'alloy-cluster-node.json'; local templateVariables = templates.newTemplateVariablesList( filterSelector=$._config.filterSelector, - enableK8sCluster=$._config.enableK8sCluster, + enableK8sCluster=$._config.enableK8sCluster, + enableAlloyCluster=$._config.enableAlloyCluster, includeInstance=true, setenceCaseLabels=$._config.useSetenceCaseTemplateLabels), diff --git a/operations/alloy-mixin/dashboards/cluster-overview.libsonnet b/operations/alloy-mixin/dashboards/cluster-overview.libsonnet index d5e4ff3fd1..3990e774a2 100644 --- a/operations/alloy-mixin/dashboards/cluster-overview.libsonnet +++ b/operations/alloy-mixin/dashboards/cluster-overview.libsonnet @@ -8,7 +8,8 @@ local cluster_node_filename = 'alloy-cluster-node.json'; local templateVariables = templates.newTemplateVariablesList( filterSelector=$._config.filterSelector, - enableK8sCluster=$._config.enableK8sCluster, + enableK8sCluster=$._config.enableK8sCluster, + enableAlloyCluster=$._config.enableAlloyCluster, includeInstance=false, setenceCaseLabels=$._config.useSetenceCaseTemplateLabels), @@ -63,6 +64,7 @@ local cluster_node_filename = 'alloy-cluster-node.json'; __name__: true, cluster: true, namespace: true, + cluster_name: true, state: false, }, indexByName: {}, @@ -102,7 +104,7 @@ local cluster_node_filename = 'alloy-cluster-node.json'; { targetBlank: false, title: 'Detail dashboard for node', - url: '/d/%(uid)s/alloy-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-job=${job}&var-cluster=${cluster}&var-namespace=${namespace}' % { uid: std.md5(cluster_node_filename) }, + url: '/d/%(uid)s/alloy-cluster-node?var-instance=${__data.fields.instance}&var-datasource=${datasource}&var-loki_datasource=${loki_datasource}&var-job=${job}&var-cluster=${cluster}&var-namespace=${namespace}&var-alloyCluster=${alloyCluster}' % { uid: std.md5(cluster_node_filename) }, }, ], }, diff --git a/operations/alloy-mixin/dashboards/controller.libsonnet b/operations/alloy-mixin/dashboards/controller.libsonnet index c1b5018ee1..23dcaa61a6 100644 --- a/operations/alloy-mixin/dashboards/controller.libsonnet +++ b/operations/alloy-mixin/dashboards/controller.libsonnet @@ -7,7 +7,8 @@ local filename = 'alloy-controller.json'; local templateVariables = templates.newTemplateVariablesList( filterSelector=$._config.filterSelector, - enableK8sCluster=$._config.enableK8sCluster, + enableK8sCluster=$._config.enableK8sCluster, + enableAlloyCluster=$._config.enableAlloyCluster, includeInstance=false, setenceCaseLabels=$._config.useSetenceCaseTemplateLabels), diff --git a/operations/alloy-mixin/dashboards/opentelemetry.libsonnet b/operations/alloy-mixin/dashboards/opentelemetry.libsonnet index 3cf6eda2d6..3e54d490d8 100644 --- a/operations/alloy-mixin/dashboards/opentelemetry.libsonnet +++ b/operations/alloy-mixin/dashboards/opentelemetry.libsonnet @@ -19,7 +19,8 @@ local stackedPanelMixin = { local templateVariables = templates.newTemplateVariablesList( filterSelector=$._config.filterSelector, - enableK8sCluster=$._config.enableK8sCluster, + enableK8sCluster=$._config.enableK8sCluster, + enableAlloyCluster=$._config.enableAlloyCluster, includeInstance=true, setenceCaseLabels=$._config.useSetenceCaseTemplateLabels), diff --git a/operations/alloy-mixin/dashboards/prometheus.libsonnet b/operations/alloy-mixin/dashboards/prometheus.libsonnet index 879e6afb9b..cdecea257d 100644 --- a/operations/alloy-mixin/dashboards/prometheus.libsonnet +++ b/operations/alloy-mixin/dashboards/prometheus.libsonnet @@ -21,7 +21,7 @@ local filename = 'alloy-prometheus-remote-write.json'; // Scrape success rate ( - panel.new(title='Scrape success rate in $cluster', type='timeseries') + + panel.new(title='Scrape success rate in k8s cluster $cluster' + if $._config.enableAlloyCluster then ', alloy cluster $alloyCluster' else '', type='timeseries') + panel.withUnit('percentunit') + panel.withDescription(||| Percentage of targets successfully scraped by prometheus.scrape @@ -29,7 +29,7 @@ local filename = 'alloy-prometheus-remote-write.json'; This metric is calculated by dividing the number of targets successfully scraped by the total number of targets scraped, - across all the namespaces in the selected cluster. + across all the namespaces in the selected k8s cluster and alloy cluster. Low success rates can indicate a problem with scrape targets, stale service discovery, or Alloy misconfiguration. @@ -37,11 +37,19 @@ local filename = 'alloy-prometheus-remote-write.json'; panel.withPosition({ x: 0, y: 1 + y_offset, w: 12, h: 10 }) + panel.withQueries([ panel.newQuery( - expr=||| - sum(up{job=~"$job", cluster=~"$cluster"}) - / - count (up{job=~"$job", cluster=~"$cluster"}) - |||, + expr= + if $._config.enableAlloyCluster then + ||| + sum(up{job=~"$job", cluster=~"$cluster", cluster_name=~"$alloyCluster"}) + / + count (up{job=~"$job", cluster=~"$cluster", cluster_name=~"$alloyCluster"}) + ||| + else + ||| + sum(up{job=~"$job", cluster=~"$cluster"}) + / + count (up{job=~"$job", cluster=~"$cluster"}) + |||, legendFormat='% of targets successfully scraped', ), ]) @@ -49,11 +57,11 @@ local filename = 'alloy-prometheus-remote-write.json'; // Scrape duration ( - panel.new(title='Scrape duration in $cluster', type='timeseries') + + panel.new(title='Scrape duration k8s cluster $cluster' + if $._config.enableAlloyCluster then ', alloy cluster $alloyCluster' else '', type='timeseries') + panel.withUnit('s') + panel.withDescription(||| Duration of successful scrapes by prometheus.scrape components, - across all the namespaces in the selected cluster. + across all the namespaces in the selected k8s cluster and alloy cluster. This metric should be below your configured scrape interval. High durations can indicate a problem with a scrape target or @@ -62,21 +70,39 @@ local filename = 'alloy-prometheus-remote-write.json'; panel.withPosition({ x: 12, y: 1 + y_offset, w: 12, h: 10 }) + panel.withQueries([ panel.newQuery( - expr=||| - quantile(0.99, scrape_duration_seconds{job=~"$job", cluster=~"$cluster"}) - |||, + expr= + if $._config.enableAlloyCluster then + ||| + quantile(0.99, scrape_duration_seconds{job=~"$job", cluster=~"$cluster", cluster_name=~"$alloyCluster"}) + ||| + else + ||| + quantile(0.99, scrape_duration_seconds{job=~"$job", cluster=~"$cluster"}) + |||, legendFormat='p99', ), panel.newQuery( - expr=||| - quantile(0.95, scrape_duration_seconds{job=~"$job", cluster=~"$cluster"}) - |||, + expr= + if $._config.enableAlloyCluster then + ||| + quantile(0.95, scrape_duration_seconds{job=~"$job", cluster=~"$cluster", cluster_name=~"$alloyCluster"}) + ||| + else + ||| + quantile(0.95, scrape_duration_seconds{job=~"$job", cluster=~"$cluster"}) + |||, legendFormat='p95', ), panel.newQuery( - expr=||| - quantile(0.50, scrape_duration_seconds{job=~"$job", cluster=~"$cluster"}) - |||, + expr= + if $._config.enableAlloyCluster then + ||| + quantile(0.50, scrape_duration_seconds{job=~"$job", cluster=~"$cluster", cluster_name=~"$alloyCluster"}) + ||| + else + ||| + quantile(0.50, scrape_duration_seconds{job=~"$job", cluster=~"$cluster"}) + |||, legendFormat='p50', ), @@ -89,7 +115,7 @@ local filename = 'alloy-prometheus-remote-write.json'; // Remote write success rate ( - panel.new(title='Remote write success rate in $cluster', type='timeseries') + + panel.new(title='Remote write success rate in k8s cluster $cluster' + if $._config.enableAlloyCluster then ', alloy cluster $alloyCluster' else '', type='timeseries') + panel.withUnit('percentunit') + panel.withDescription(||| Percentage of samples sent by prometheus.remote_write that succeeded. @@ -118,7 +144,7 @@ local filename = 'alloy-prometheus-remote-write.json'; // Write latency ( - panel.new(title='Write latency in $cluster', type='timeseries') + + panel.new(title='Write latency in k8s cluster $cluster' + if $._config.enableAlloyCluster then ', alloy cluster $alloyCluster' else '', type='timeseries') + panel.withUnit('s') + panel.withDescription(||| Latency of writes to the remote system made by @@ -430,63 +456,123 @@ local filename = 'alloy-prometheus-remote-write.json'; local k8sComponentPathQuery = if std.isEmpty($._config.filterSelector) then - ||| - label_values(prometheus_remote_write_wal_samples_appended_total{cluster=~"$cluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) - ||| + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{cluster=~"$cluster", cluster_name=~"$alloyCluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) + ||| + else + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{cluster=~"$cluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) + ||| else - ||| - label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, cluster=~"$cluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) - ||| % $._config, + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, cluster=~"$cluster", cluster_name=~"$alloyCluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) + ||| % $._config + else + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, cluster=~"$cluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) + ||| % $._config, local k8sComponentQuery = if std.isEmpty($._config.filterSelector) then - ||| - label_values(prometheus_remote_write_wal_samples_appended_total{cluster=~"$cluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) - ||| + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{cluster=~"$cluster", cluster_name=~"$alloyCluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) + ||| + else + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{cluster=~"$cluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) + ||| else - ||| - label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, cluster=~"$cluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) - ||| % $._config, + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, cluster=~"$cluster", cluster_name=~"$alloyCluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) + ||| % $._config + else + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, cluster=~"$cluster", namespace=~"$namespace", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) + ||| % $._config, local k8sUrlQuery = if std.isEmpty($._config.filterSelector) then - ||| - label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=~"$cluster", namespace=~"$namespace", job="$job", instance=~"$instance", component_id=~"$component"}, url) - ||| + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=~"$cluster", cluster_name=~"$alloyCluster", namespace=~"$namespace", job="$job", instance=~"$instance", component_id=~"$component"}, url) + ||| + else + ||| + label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster=~"$cluster", namespace=~"$namespace", job="$job", instance=~"$instance", component_id=~"$component"}, url) + ||| else - ||| - label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{%(filterSelector)s, cluster=~"$cluster", namespace=~"$namespace", job="$job", instance=~"$instance", component_id=~"$component"}, url) - ||| % $._config, + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{%(filterSelector)s, cluster=~"$cluster", cluster_name=~"$alloyCluster", namespace=~"$namespace", job="$job", instance=~"$instance", component_id=~"$component"}, url) + ||| % $._config + else + ||| + label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{%(filterSelector)s, cluster=~"$cluster", namespace=~"$namespace", job="$job", instance=~"$instance", component_id=~"$component"}, url) + ||| % $._config, local componentPathQuery = if std.isEmpty($._config.filterSelector) then - ||| - label_values(prometheus_remote_write_wal_samples_appended_total{job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) - ||| + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{cluster_name=~"$alloyCluster", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) + ||| + else + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) + ||| else - ||| - label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) - ||| % $._config, + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, cluster_name=~"$alloyCluster", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) + ||| % $._config + else + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*", component_path=~".*"}, component_path) + ||| % $._config, local componentQuery = if std.isEmpty($._config.filterSelector) then - ||| - label_values(prometheus_remote_write_wal_samples_appended_total{job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) - ||| + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{cluster_name=~"$alloyCluster", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) + ||| + else + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) + ||| else - ||| - label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) - ||| % $._config, + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, cluster_name=~"$alloyCluster", job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) + ||| % $._config + else + ||| + label_values(prometheus_remote_write_wal_samples_appended_total{%(filterSelector)s, job=~"$job", instance=~"$instance", component_id=~"prometheus.remote_write.*"}, component_id) + ||| % $._config, local urlQuery = if std.isEmpty($._config.filterSelector) then - ||| - label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{job="$job", instance=~"$instance", component_id=~"$component"}, url) - ||| + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{cluster_name=~"$alloyCluster", job="$job", instance=~"$instance", component_id=~"$component"}, url) + ||| + else + ||| + label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{job="$job", instance=~"$instance", component_id=~"$component"}, url) + ||| else - ||| - label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{%(filterSelector)s, job="$job", instance=~"$instance", component_id=~"$component"}, url) - ||| % $._config, + if $._config.enableAlloyCluster then + ||| + label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{%(filterSelector)s, cluster_name=~"$alloyCluster", job="$job", instance=~"$instance", component_id=~"$component"}, url) + ||| % $._config + else + ||| + label_values(prometheus_remote_storage_sent_batch_duration_seconds_sum{%(filterSelector)s, job="$job", instance=~"$instance", component_id=~"$component"}, url) + ||| % $._config, local prometheusTemplateVariables = if $._config.enableK8sCluster then @@ -523,7 +609,8 @@ local filename = 'alloy-prometheus-remote-write.json'; local templateVariables = templates.newTemplateVariablesList( filterSelector=$._config.filterSelector, - enableK8sCluster=$._config.enableK8sCluster, + enableK8sCluster=$._config.enableK8sCluster, + enableAlloyCluster=$._config.enableAlloyCluster, includeInstance=true, setenceCaseLabels=$._config.useSetenceCaseTemplateLabels) + prometheusTemplateVariables, diff --git a/operations/alloy-mixin/dashboards/resources.libsonnet b/operations/alloy-mixin/dashboards/resources.libsonnet index 47230e171f..7c4b4cda91 100644 --- a/operations/alloy-mixin/dashboards/resources.libsonnet +++ b/operations/alloy-mixin/dashboards/resources.libsonnet @@ -31,7 +31,8 @@ local stackedPanelMixin = { local templateVariables = templates.newTemplateVariablesList( filterSelector=$._config.filterSelector, - enableK8sCluster=$._config.enableK8sCluster, + enableK8sCluster=$._config.enableK8sCluster, + enableAlloyCluster=$._config.enableAlloyCluster, includeInstance=true, setenceCaseLabels=$._config.useSetenceCaseTemplateLabels), diff --git a/operations/alloy-mixin/dashboards/utils/templates.libsonnet b/operations/alloy-mixin/dashboards/utils/templates.libsonnet index 98c0da0264..2d0983f8ec 100644 --- a/operations/alloy-mixin/dashboards/utils/templates.libsonnet +++ b/operations/alloy-mixin/dashboards/utils/templates.libsonnet @@ -1,7 +1,7 @@ local dashboard = import './dashboard.jsonnet'; { - newTemplateVariablesList(filterSelector='', enableK8sCluster=true, includeInstance=true, setenceCaseLabels=false):: ( + newTemplateVariablesList(filterSelector='', enableK8sCluster=true, enableAlloyCluster=true, includeInstance=true, setenceCaseLabels=false):: ( local clusterTemplateQuery = if std.isEmpty(filterSelector) then @@ -23,6 +23,16 @@ local dashboard = import './dashboard.jsonnet'; label_values(alloy_component_controller_running_components{%s, cluster=~"$cluster"}, namespace) ||| % filterSelector; + local k8sAlloyClusterTemplateQuery = + if std.isEmpty(filterSelector) then + ||| + label_values(alloy_component_controller_running_components{cluster=~"$cluster", namespace=~"$namespace"}, cluster_name) + ||| + else + ||| + label_values(alloy_component_controller_running_components{%s, cluster=~"$cluster", namespace=~"$namespace"}, cluster_name) + ||| % filterSelector; + local k8sJobTemplateQuery = if std.isEmpty(filterSelector) then ||| @@ -43,6 +53,16 @@ local dashboard = import './dashboard.jsonnet'; label_values(alloy_component_controller_running_components{%s, cluster=~"$cluster", namespace=~"$namespace", job=~"$job"}, instance) ||| % filterSelector; + local alloyClusterTemplateQuery = + if std.isEmpty(filterSelector) then + ||| + label_values(alloy_component_controller_running_components, cluster_name) + ||| + else + ||| + label_values(alloy_component_controller_running_components{%s}, cluster_name) + ||| % filterSelector; + local jobTemplateQuery = if std.isEmpty(filterSelector) then ||| @@ -78,6 +98,14 @@ local dashboard = import './dashboard.jsonnet'; query=k8sJobTemplateQuery, setenceCaseLabels=setenceCaseLabels), ] + + if enableAlloyCluster then + [ + dashboard.newTemplateVariable( + name='alloyCluster', + query=k8sAlloyClusterTemplateQuery, + setenceCaseLabels=setenceCaseLabels), + ] + else [] + if includeInstance then [ dashboard.newMultiTemplateVariable( @@ -91,8 +119,16 @@ local dashboard = import './dashboard.jsonnet'; dashboard.newTemplateVariable( name='job', query=jobTemplateQuery, - setenceCaseLabels=setenceCaseLabels), + setenceCaseLabels=setenceCaseLabels), ] + + if enableAlloyCluster then + [ + dashboard.newTemplateVariable( + name='alloyCluster', + query=alloyClusterTemplateQuery, + setenceCaseLabels=setenceCaseLabels), + ] + else [] + if includeInstance then [ dashboard.newMultiTemplateVariable( diff --git a/operations/alloy-mixin/jsonnetfile.lock.json b/operations/alloy-mixin/jsonnetfile.lock.json index 97201a3be0..c07f905a6f 100644 --- a/operations/alloy-mixin/jsonnetfile.lock.json +++ b/operations/alloy-mixin/jsonnetfile.lock.json @@ -8,9 +8,19 @@ "subdir": "gen/grafonnet-v10.0.0" } }, - "version": "1c56af39815c4903e47c27194444456f005f65df", + "version": "82a19822e54a0a12a51e24dbd48fcde717dc0864", "sum": "xdcrJPJlpkq4+5LpGwN4tPAuheNNLXZjE6tDcyvFjr0=" }, + { + "source": { + "git": { + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-v11.0.0" + } + }, + "version": "82a19822e54a0a12a51e24dbd48fcde717dc0864", + "sum": "0BvzR0i4bS4hc2O3xDv6i9m52z7mPrjvqxtcPrGhynA=" + }, { "source": { "git": { @@ -18,8 +28,8 @@ "subdir": "logs-lib/" } }, - "version": "21526e83f442793d5a0c5969867d123915422b79", - "sum": "IkBo9nj0Qt1eC9w80dO5SI4yvHzmmXcKx5BK8H8U0Mk=" + "version": "ab84b9f67c7a7f61e0c0a311afb47a1af4f5903f", + "sum": "05RYR0TOrWza0n8lgF9K7naGY7kM6OTBePsoiJw2TUE=" }, { "source": { @@ -28,8 +38,8 @@ "subdir": "logs-lib/logs" } }, - "version": "21526e83f442793d5a0c5969867d123915422b79", - "sum": "CemcPbsPzyRUchDLH1TKTxWWgBlg1MRT0jH2X172z6w=" + "version": "ab84b9f67c7a7f61e0c0a311afb47a1af4f5903f", + "sum": "7CtlWcss2iTSWi9dmdEpQ/YvlxQYsMd7Ty2390fUriY=" }, { "source": {