Skip to content

Commit

Permalink
Add label cluster_name to alloy_config_hash and some cluster_ metrics (
Browse files Browse the repository at this point in the history
…#1679)

* Add cluster name in metric alloy_config_hash via the label alloy_cluster

* changelog entry

* bump ckit version to get cluster_name label on cluster metrics

* change label to cluster_name to be inline with ckit

* simplify config_metrics logic because empty label are ignored

* update alerts with cluster_name label
  • Loading branch information
wildum committed Sep 13, 2024
1 parent 9e290c6 commit b83c8eb
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 46 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ v1.4.0-rc.0

- `prometheus.exporter.cloudwatch` can now collect metrics from custom namespaces via the `custom_namespace` block. (@ptodev)

- Add the label `alloy_cluster` in the metric `alloy_config_hash` when the flag `cluster.name` is set to help differentiate between
configs from the same alloy cluster or different alloy clusters. (@wildum)

### Bugfixes

- Fix a bug where the scrape timeout for a Probe resource was not applied, overwriting the scrape interval instead. (@morremeyer, @stefanandres)
Expand Down
12 changes: 6 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ require (
github.com/grafana/alloy/syntax v0.1.0
github.com/grafana/beyla v1.8.2
github.com/grafana/catchpoint-prometheus-exporter v0.0.0-20240606062944-e55f3668661d
github.com/grafana/ckit v0.0.0-20240624165704-36f3407a8eaa
github.com/grafana/ckit v0.0.0-20240913130805-0ee98bafad88
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2
github.com/grafana/dskit v0.0.0-20240104111617-ea101a3b86eb
github.com/grafana/go-gelf/v2 v2.0.1
Expand Down Expand Up @@ -154,7 +154,7 @@ require (
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.66.0
github.com/prometheus-operator/prometheus-operator/pkg/client v0.66.0
github.com/prometheus/blackbox_exporter v0.24.1-0.20230623125439-bd22efa1c900
github.com/prometheus/client_golang v1.20.2
github.com/prometheus/client_golang v1.20.3
github.com/prometheus/client_model v0.6.1
github.com/prometheus/common v0.55.0
github.com/prometheus/common/sigv4 v0.1.0
Expand Down Expand Up @@ -242,13 +242,13 @@ require (
go.uber.org/goleak v1.3.0
go.uber.org/multierr v1.11.0
go.uber.org/zap v1.27.0
golang.org/x/crypto v0.26.0
golang.org/x/crypto v0.27.0
golang.org/x/crypto/x509roots/fallback v0.0.0-20240208163226-62c9f1799c91
golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842
golang.org/x/net v0.28.0
golang.org/x/net v0.29.0
golang.org/x/oauth2 v0.22.0
golang.org/x/sys v0.25.0
golang.org/x/text v0.17.0
golang.org/x/text v0.18.0
golang.org/x/time v0.5.0
golang.org/x/tools v0.23.0
google.golang.org/api v0.188.0
Expand Down Expand Up @@ -791,7 +791,7 @@ require (
golang.org/x/arch v0.7.0 // indirect
golang.org/x/mod v0.19.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/term v0.23.0 // indirect
golang.org/x/term v0.24.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
gonum.org/v1/gonum v0.15.1 // indirect
Expand Down
24 changes: 12 additions & 12 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1196,8 +1196,8 @@ github.com/grafana/cadvisor v0.0.0-20240729082359-1f04a91701e2 h1:ju6EcY2aEobeBg
github.com/grafana/cadvisor v0.0.0-20240729082359-1f04a91701e2/go.mod h1:8sLW/G7rcFe1CKMaA4pYT4mX3P1xQVGqM6luzEzx/2g=
github.com/grafana/catchpoint-prometheus-exporter v0.0.0-20240606062944-e55f3668661d h1:6sNPBwOokfCxAyateu7iLdtyWDUzaLLShPs7F4eTLfw=
github.com/grafana/catchpoint-prometheus-exporter v0.0.0-20240606062944-e55f3668661d/go.mod h1:aGPSALDAkw18nn8M7gumhM/MbJG+zgOA3jNWTwPYtLg=
github.com/grafana/ckit v0.0.0-20240624165704-36f3407a8eaa h1:3rdc/z801roM6ky8cT8wz4tahQWkTxJ4VAmzANZe8qQ=
github.com/grafana/ckit v0.0.0-20240624165704-36f3407a8eaa/go.mod h1:k21VjCNs7gj1pAV80wb1577fVRePk51Hek5QUMEvKE0=
github.com/grafana/ckit v0.0.0-20240913130805-0ee98bafad88 h1:GgbYRGz2+/Vgz8/lk19Ht8TQDsAudl51Qenuw+COs5k=
github.com/grafana/ckit v0.0.0-20240913130805-0ee98bafad88/go.mod h1:dDqep1rKTbq2ppMYEgIM88GaPXHp4i6Cp3qantiloA0=
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 h1:qhugDMdQ4Vp68H0tp/0iN17DM2ehRo1rLEdOFe/gB8I=
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2/go.mod h1:w/aiO1POVIeXUQyl0VQSZjl5OAGDTL5aX+4v0RA1tcw=
github.com/grafana/dskit v0.0.0-20240104111617-ea101a3b86eb h1:AWE6+kvtE18HP+lRWNUCyvymyrFSXs6TcS2vXIXGIuw=
Expand Down Expand Up @@ -2156,8 +2156,8 @@ github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP
github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY=
github.com/prometheus/client_golang v1.20.2 h1:5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg=
github.com/prometheus/client_golang v1.20.2/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4=
github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
Expand Down Expand Up @@ -2769,8 +2769,8 @@ golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A=
golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70=
golang.org/x/crypto/x509roots/fallback v0.0.0-20240208163226-62c9f1799c91 h1:Lyizcy9jX02jYR0ceBkL6S+jRys8Uepf7wt1vrz6Ras=
golang.org/x/crypto/x509roots/fallback v0.0.0-20240208163226-62c9f1799c91/go.mod h1:kNa9WdvYnzFwC79zRpLRMJbdEFlhyM5RPFBBZp/wWH8=
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
Expand Down Expand Up @@ -2896,8 +2896,8 @@ golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo=
golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0=
golang.org/x/oauth2 v0.0.0-20170807180024-9a379c6b3e95/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down Expand Up @@ -3070,8 +3070,8 @@ golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/term v0.15.0/go.mod h1:BDl952bC7+uMoWR75FIrCDx79TPU9oHkTZ9yRbYOrX0=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM=
golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8=
golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand All @@ -3092,8 +3092,8 @@ golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
Expand Down
5 changes: 1 addition & 4 deletions internal/alloycli/cmd_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,7 @@ func (fr *alloyRun) Run(configPath string) error {
ready = f.Ready
reload = func() (*alloy_runtime.Source, error) {
alloySource, err := loadAlloySource(configPath, fr.configFormat, fr.configBypassConversionErrors, fr.configExtraArgs)
defer instrumentation.InstrumentSHA256(alloySource.SHA256())
defer instrumentation.InstrumentLoad(err == nil)
defer instrumentation.InstrumentConfig(err == nil, alloySource.SHA256(), fr.clusterName)

if err != nil {
return nil, fmt.Errorf("reading config path %q: %w", configPath, err)
Expand Down Expand Up @@ -476,8 +475,6 @@ func loadAlloySource(path string, converterSourceFormat string, converterBypassE
}
}

instrumentation.InstrumentConfig(bb)

return alloy_runtime.ParseSource(path, bb)
}

Expand Down
31 changes: 13 additions & 18 deletions internal/static/config/instrumentation/config_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,20 @@ type configMetrics struct {
var confMetrics *configMetrics
var configMetricsInitializer sync.Once

func initializeConfigMetrics() {
confMetrics = newConfigMetrics()
func initializeConfigMetrics(clusterName string) {
confMetrics = newConfigMetrics(clusterName)
}

func newConfigMetrics() *configMetrics {
func newConfigMetrics(clusterName string) *configMetrics {
var m configMetrics

m.configHash = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "alloy_config_hash",
Help: "Hash of the currently active config file.",
ConstLabels: prometheus.Labels{
"cluster_name": clusterName,
},
},
[]string{"sha256"},
)
Expand All @@ -49,27 +52,19 @@ func newConfigMetrics() *configMetrics {
return &m
}

// Create a sha256 hash of the config before expansion and expose it via
// the alloy_config_hash metric.
func InstrumentConfig(buf []byte) {
InstrumentSHA256(sha256.Sum256(buf))
}

// InstrumentSHA256 stores the provided hash to the alloy_config_hash metric.
func InstrumentSHA256(hash [sha256.Size]byte) {
configMetricsInitializer.Do(initializeConfigMetrics)
confMetrics.configHash.Reset()
confMetrics.configHash.WithLabelValues(fmt.Sprintf("%x", hash)).Set(1)
}
func InstrumentConfig(success bool, hash [sha256.Size]byte, clusterName string) {
configMetricsInitializer.Do(func() {
initializeConfigMetrics(clusterName)
})

// Expose metrics for load success / failures.
func InstrumentLoad(success bool) {
configMetricsInitializer.Do(initializeConfigMetrics)
if success {
confMetrics.configLoadSuccessSeconds.SetToCurrentTime()
confMetrics.configLoadSuccess.Set(1)
} else {
confMetrics.configLoadSuccess.Set(0)
confMetrics.configLoadFailures.Inc()
}

confMetrics.configHash.Reset()
confMetrics.configHash.WithLabelValues(fmt.Sprintf("%x", hash)).Set(1)
}
12 changes: 6 additions & 6 deletions operations/alloy-mixin/alerts/clustering.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ local alert = import './utils/alert.jsonnet';
alert.newRule(
'ClusterNotConverging',
if enableK8sCluster then
'stddev by (cluster, namespace, job) (sum without (state) (cluster_node_peers)) != 0'
'stddev by (cluster, namespace, job, cluster_name) (sum without (state) (cluster_node_peers)) != 0'
else
'stddev by (job) (sum without (state) (cluster_node_peers)) != 0',
'Cluster is not converging.',
Expand All @@ -25,8 +25,8 @@ local alert = import './utils/alert.jsonnet';
// metrics.
if enableK8sCluster then |||
sum without (state) (cluster_node_peers) !=
on (cluster, namespace, job) group_left
count by (cluster, namespace, job) (cluster_node_info)
on (cluster, namespace, job, cluster_name) group_left
count by (cluster, namespace, job, cluster_name) (cluster_node_info)
||| else |||
sum without (state) (cluster_node_peers) !=
on (job) group_left
Expand All @@ -53,7 +53,7 @@ local alert = import './utils/alert.jsonnet';
alert.newRule(
'ClusterNodeNameConflict',
if enableK8sCluster then
'sum by (cluster, namespace, job) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0'
'sum by (cluster, namespace, job, cluster_name) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0'
else
'sum by (job) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0'
,
Expand All @@ -66,7 +66,7 @@ local alert = import './utils/alert.jsonnet';
alert.newRule(
'ClusterNodeStuckTerminating',
if enableK8sCluster then
'sum by (cluster, namespace, job, instance) (cluster_node_peers{state="terminating"}) > 0'
'sum by (cluster, namespace, job, instance, cluster_name) (cluster_node_peers{state="terminating"}) > 0'
else
'sum by (job, instance) (cluster_node_peers{state="terminating"}) > 0'
,
Expand All @@ -80,7 +80,7 @@ local alert = import './utils/alert.jsonnet';
'ClusterConfigurationDrift',
if enableK8sCluster then |||
count without (sha256) (
max by (cluster, namespace, sha256, job) (alloy_config_hash and on(cluster, namespace, job) cluster_node_info)
max by (cluster, namespace, sha256, job, cluster_name) (alloy_config_hash and on(cluster, namespace, job, cluster_name) cluster_node_info)
) > 1
||| else |||
count without (sha256) (
Expand Down

0 comments on commit b83c8eb

Please sign in to comment.