From f9e41294c3171d1beae8a69c405746d7a932fc5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan-Otto=20Kr=C3=B6pke?= Date: Sun, 24 Nov 2024 13:57:19 +0100 Subject: [PATCH] mscluster: fix cluster and network sub collectors (#1759) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan-Otto Kröpke --- internal/collector/mscluster/mscluster.go | 140 ++++++++++++------ .../collector/mscluster/mscluster_cluster.go | 16 +- .../collector/mscluster/mscluster_network.go | 16 +- .../collector/mscluster/mscluster_node.go | 16 +- .../collector/mscluster/mscluster_resource.go | 16 +- .../mscluster/mscluster_resourcegroup.go | 16 +- 6 files changed, 162 insertions(+), 58 deletions(-) diff --git a/internal/collector/mscluster/mscluster.go b/internal/collector/mscluster/mscluster.go index ebc3cad35..ba253fa62 100644 --- a/internal/collector/mscluster/mscluster.go +++ b/internal/collector/mscluster/mscluster.go @@ -8,13 +8,22 @@ import ( "log/slog" "slices" "strings" + "sync" "github.com/alecthomas/kingpin/v2" "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus/client_golang/prometheus" ) -const Name = "mscluster" +const ( + Name = "mscluster" + + subCollectorCluster = "cluster" + subCollectorNetwork = "network" + subCollectorNode = "node" + subCollectorResource = "resource" + subCollectorResourceGroup = "resourcegroup" +) type Config struct { CollectorsEnabled []string `yaml:"collectors_enabled"` @@ -22,11 +31,11 @@ type Config struct { var ConfigDefaults = Config{ CollectorsEnabled: []string{ - "cluster", - "network", - "node", - "resource", - "resourcegroup", + subCollectorCluster, + subCollectorNetwork, + subCollectorNode, + subCollectorResource, + subCollectorResourceGroup, }, } @@ -99,27 +108,39 @@ func (c *Collector) Build(_ *slog.Logger, miSession *mi.Session) error { c.miSession = miSession - if slices.Contains(c.config.CollectorsEnabled, "cluster") { - c.buildCluster() + errs := make([]error, 0, 5) + + if slices.Contains(c.config.CollectorsEnabled, subCollectorCluster) { + if err := c.buildCluster(); err != nil { + errs = append(errs, fmt.Errorf("failed to build cluster collector: %w", err)) + } } - if slices.Contains(c.config.CollectorsEnabled, "network") { - c.buildNetwork() + if slices.Contains(c.config.CollectorsEnabled, subCollectorNetwork) { + if err := c.buildNetwork(); err != nil { + errs = append(errs, fmt.Errorf("failed to build network collector: %w", err)) + } } - if slices.Contains(c.config.CollectorsEnabled, "node") { - c.buildNode() + if slices.Contains(c.config.CollectorsEnabled, subCollectorNode) { + if err := c.buildNode(); err != nil { + errs = append(errs, fmt.Errorf("failed to build node collector: %w", err)) + } } - if slices.Contains(c.config.CollectorsEnabled, "resource") { - c.buildResource() + if slices.Contains(c.config.CollectorsEnabled, subCollectorResource) { + if err := c.buildResource(); err != nil { + errs = append(errs, fmt.Errorf("failed to build resource collector: %w", err)) + } } - if slices.Contains(c.config.CollectorsEnabled, "resourcegroup") { - c.buildResourceGroup() + if slices.Contains(c.config.CollectorsEnabled, subCollectorResourceGroup) { + if err := c.buildResourceGroup(); err != nil { + errs = append(errs, fmt.Errorf("failed to build resource group collector: %w", err)) + } } - return nil + return errors.Join(errs...) } // Collect sends the metric values for each metric @@ -129,40 +150,73 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error { return nil } - var ( - err error - errs []error - nodeNames []string - ) + errCh := make(chan error, 5) - if slices.Contains(c.config.CollectorsEnabled, "cluster") { - if err = c.collectCluster(ch); err != nil { - errs = append(errs, fmt.Errorf("failed to collect cluster metrics: %w", err)) - } - } + wg := sync.WaitGroup{} + wg.Add(5) - if slices.Contains(c.config.CollectorsEnabled, "network") { - if err = c.collectNetwork(ch); err != nil { - errs = append(errs, fmt.Errorf("failed to collect network metrics: %w", err)) - } - } + go func() { + defer wg.Done() - if slices.Contains(c.config.CollectorsEnabled, "node") { - if nodeNames, err = c.collectNode(ch); err != nil { - errs = append(errs, fmt.Errorf("failed to collect node metrics: %w", err)) + if slices.Contains(c.config.CollectorsEnabled, subCollectorCluster) { + if err := c.collectCluster(ch); err != nil { + errCh <- fmt.Errorf("failed to collect cluster metrics: %w", err) + } } - } + }() + + go func() { + defer wg.Done() - if slices.Contains(c.config.CollectorsEnabled, "resource") { - if err = c.collectResource(ch, nodeNames); err != nil { - errs = append(errs, fmt.Errorf("failed to collect resource metrics: %w", err)) + if slices.Contains(c.config.CollectorsEnabled, subCollectorNetwork) { + if err := c.collectNetwork(ch); err != nil { + errCh <- fmt.Errorf("failed to collect network metrics: %w", err) + } } - } + }() + + go func() { + defer wg.Done() - if slices.Contains(c.config.CollectorsEnabled, "resourcegroup") { - if err = c.collectResourceGroup(ch, nodeNames); err != nil { - errs = append(errs, fmt.Errorf("failed to collect resource group metrics: %w", err)) + nodeNames := make([]string, 0) + + if slices.Contains(c.config.CollectorsEnabled, subCollectorNode) { + var err error + + nodeNames, err = c.collectNode(ch) + if err != nil { + errCh <- fmt.Errorf("failed to collect node metrics: %w", err) + } } + + go func() { + defer wg.Done() + + if slices.Contains(c.config.CollectorsEnabled, subCollectorResource) { + if err := c.collectResource(ch, nodeNames); err != nil { + errCh <- fmt.Errorf("failed to collect resource metrics: %w", err) + } + } + }() + + go func() { + defer wg.Done() + + if slices.Contains(c.config.CollectorsEnabled, subCollectorResourceGroup) { + if err := c.collectResourceGroup(ch, nodeNames); err != nil { + errCh <- fmt.Errorf("failed to collect resource group metrics: %w", err) + } + } + }() + }() + + wg.Wait() + close(errCh) + + errs := make([]error, 0, 5) + + for err := range errCh { + errs = append(errs, err) } return errors.Join(errs...) diff --git a/internal/collector/mscluster/mscluster_cluster.go b/internal/collector/mscluster/mscluster_cluster.go index d6651d02c..59f440d11 100644 --- a/internal/collector/mscluster/mscluster_cluster.go +++ b/internal/collector/mscluster/mscluster_cluster.go @@ -7,13 +7,14 @@ import ( "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/types" - "github.com/prometheus-community/windows_exporter/internal/utils" "github.com/prometheus/client_golang/prometheus" ) const nameCluster = Name + "_cluster" type collectorCluster struct { + clusterMIQuery mi.Query + clusterAddEvictDelay *prometheus.Desc clusterAdminAccessPoint *prometheus.Desc clusterAutoAssignNodeSite *prometheus.Desc @@ -177,7 +178,14 @@ type msClusterCluster struct { WitnessRestartInterval uint `mi:"WitnessRestartInterval"` } -func (c *Collector) buildCluster() { +func (c *Collector) buildCluster() error { + clusterMIQuery, err := mi.NewQuery("SELECT * FROM MSCluster_Cluster") + if err != nil { + return fmt.Errorf("failed to create WMI query: %w", err) + } + + c.clusterMIQuery = clusterMIQuery + c.clusterAddEvictDelay = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, nameCluster, "add_evict_delay"), "Provides access to the cluster's AddEvictDelay property, which is the number a seconds that a new node is delayed after an eviction of another node.", @@ -640,11 +648,13 @@ func (c *Collector) buildCluster() { []string{"name"}, nil, ) + + return nil } func (c *Collector) collectCluster(ch chan<- prometheus.Metric) error { var dst []msClusterCluster - if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, utils.Must(mi.NewQuery("SELECT * MSCluster_Cluster"))); err != nil { + if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, c.clusterMIQuery); err != nil { return fmt.Errorf("WMI query failed: %w", err) } diff --git a/internal/collector/mscluster/mscluster_network.go b/internal/collector/mscluster/mscluster_network.go index a62d78c50..37a8010c0 100644 --- a/internal/collector/mscluster/mscluster_network.go +++ b/internal/collector/mscluster/mscluster_network.go @@ -7,13 +7,14 @@ import ( "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/types" - "github.com/prometheus-community/windows_exporter/internal/utils" "github.com/prometheus/client_golang/prometheus" ) const nameNetwork = Name + "_network" type collectorNetwork struct { + networkMIQuery mi.Query + networkCharacteristics *prometheus.Desc networkFlags *prometheus.Desc networkMetric *prometheus.Desc @@ -33,7 +34,14 @@ type msClusterNetwork struct { State uint `mi:"State"` } -func (c *Collector) buildNetwork() { +func (c *Collector) buildNetwork() error { + networkMIQuery, err := mi.NewQuery("SELECT * FROM MSCluster_Network") + if err != nil { + return fmt.Errorf("failed to create WMI query: %w", err) + } + + c.networkMIQuery = networkMIQuery + c.networkCharacteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, nameNetwork, "characteristics"), "Provides the characteristics of the network.", @@ -64,6 +72,8 @@ func (c *Collector) buildNetwork() { []string{"name"}, nil, ) + + return nil } // Collect sends the metric values for each metric @@ -71,7 +81,7 @@ func (c *Collector) buildNetwork() { func (c *Collector) collectNetwork(ch chan<- prometheus.Metric) error { var dst []msClusterNetwork - if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, utils.Must(mi.NewQuery("SELECT * MSCluster_Node"))); err != nil { + if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, c.networkMIQuery); err != nil { return fmt.Errorf("WMI query failed: %w", err) } diff --git a/internal/collector/mscluster/mscluster_node.go b/internal/collector/mscluster/mscluster_node.go index 7a32cf0b9..ea02a48f1 100644 --- a/internal/collector/mscluster/mscluster_node.go +++ b/internal/collector/mscluster/mscluster_node.go @@ -7,13 +7,14 @@ import ( "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/types" - "github.com/prometheus-community/windows_exporter/internal/utils" "github.com/prometheus/client_golang/prometheus" ) const nameNode = Name + "_node" type collectorNode struct { + nodeMIQuery mi.Query + nodeBuildNumber *prometheus.Desc nodeCharacteristics *prometheus.Desc nodeDetectedCloudPlatform *prometheus.Desc @@ -51,7 +52,14 @@ type msClusterNode struct { StatusInformation uint `mi:"StatusInformation"` } -func (c *Collector) buildNode() { +func (c *Collector) buildNode() error { + nodeMIQuery, err := mi.NewQuery("SELECT * FROM MSCluster_Node") + if err != nil { + return fmt.Errorf("failed to create WMI query: %w", err) + } + + c.nodeMIQuery = nodeMIQuery + c.nodeBuildNumber = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, nameNode, "build_number"), "Provides access to the node's BuildNumber property.", @@ -136,6 +144,8 @@ func (c *Collector) buildNode() { []string{"name"}, nil, ) + + return nil } // Collect sends the metric values for each metric @@ -143,7 +153,7 @@ func (c *Collector) buildNode() { func (c *Collector) collectNode(ch chan<- prometheus.Metric) ([]string, error) { var dst []msClusterNode - if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, utils.Must(mi.NewQuery("SELECT * FROM MSCluster_Node"))); err != nil { + if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, c.nodeMIQuery); err != nil { return nil, fmt.Errorf("WMI query failed: %w", err) } diff --git a/internal/collector/mscluster/mscluster_resource.go b/internal/collector/mscluster/mscluster_resource.go index 2c6839557..d530f2304 100644 --- a/internal/collector/mscluster/mscluster_resource.go +++ b/internal/collector/mscluster/mscluster_resource.go @@ -7,13 +7,14 @@ import ( "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/types" - "github.com/prometheus-community/windows_exporter/internal/utils" "github.com/prometheus/client_golang/prometheus" ) const nameResource = Name + "_resource" type collectorResource struct { + resourceMIQuery mi.Query + resourceCharacteristics *prometheus.Desc resourceDeadlockTimeout *prometheus.Desc resourceEmbeddedFailureAction *prometheus.Desc @@ -59,7 +60,14 @@ type msClusterResource struct { Subclass uint `mi:"Subclass"` } -func (c *Collector) buildResource() { +func (c *Collector) buildResource() error { + resourceMIQuery, err := mi.NewQuery("SELECT * FROM MSCluster_Resource") + if err != nil { + return fmt.Errorf("failed to create WMI query: %w", err) + } + + c.resourceMIQuery = resourceMIQuery + c.resourceCharacteristics = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, nameResource, "characteristics"), "Provides the characteristics of the object.", @@ -168,6 +176,8 @@ func (c *Collector) buildResource() { []string{"type", "owner_group", "name"}, nil, ) + + return nil } // Collect sends the metric values for each metric @@ -175,7 +185,7 @@ func (c *Collector) buildResource() { func (c *Collector) collectResource(ch chan<- prometheus.Metric, nodeNames []string) error { var dst []msClusterResource - if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, utils.Must(mi.NewQuery("SELECT * FROM MSCluster_Resource"))); err != nil { + if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, c.resourceMIQuery); err != nil { return fmt.Errorf("WMI query failed: %w", err) } diff --git a/internal/collector/mscluster/mscluster_resourcegroup.go b/internal/collector/mscluster/mscluster_resourcegroup.go index 7fb66f175..a1f068f51 100644 --- a/internal/collector/mscluster/mscluster_resourcegroup.go +++ b/internal/collector/mscluster/mscluster_resourcegroup.go @@ -7,13 +7,14 @@ import ( "github.com/prometheus-community/windows_exporter/internal/mi" "github.com/prometheus-community/windows_exporter/internal/types" - "github.com/prometheus-community/windows_exporter/internal/utils" "github.com/prometheus/client_golang/prometheus" ) const nameResourceGroup = Name + "_resourcegroup" type collectorResourceGroup struct { + resourceGroupMIQuery mi.Query + resourceGroupAutoFailbackType *prometheus.Desc resourceGroupCharacteristics *prometheus.Desc resourceGroupColdStartSetting *prometheus.Desc @@ -51,7 +52,14 @@ type msClusterResourceGroup struct { State uint `mi:"State"` } -func (c *Collector) buildResourceGroup() { +func (c *Collector) buildResourceGroup() error { + resourceGroupMIQuery, err := mi.NewQuery("SELECT * FROM MSCluster_ResourceGroup") + if err != nil { + return fmt.Errorf("failed to create WMI query: %w", err) + } + + c.resourceGroupMIQuery = resourceGroupMIQuery + c.resourceGroupAutoFailbackType = prometheus.NewDesc( prometheus.BuildFQName(types.Namespace, nameResourceGroup, "auto_failback_type"), "Provides access to the group's AutoFailbackType property.", @@ -142,6 +150,8 @@ func (c *Collector) buildResourceGroup() { []string{"name"}, nil, ) + + return nil } // Collect sends the metric values for each metric @@ -149,7 +159,7 @@ func (c *Collector) buildResourceGroup() { func (c *Collector) collectResourceGroup(ch chan<- prometheus.Metric, nodeNames []string) error { var dst []msClusterResourceGroup - if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, utils.Must(mi.NewQuery("SELECT * FROM MSCluster_ResourceGroup"))); err != nil { + if err := c.miSession.Query(&dst, mi.NamespaceRootMSCluster, c.resourceGroupMIQuery); err != nil { return fmt.Errorf("WMI query failed: %w", err) }