From f40e6eed655f43f806b268ef43341b72282aab50 Mon Sep 17 00:00:00 2001 From: Pete Woods Date: Tue, 22 Sep 2020 18:53:50 +0100 Subject: [PATCH] Add node "status", "scheduling eligibility" to all client metrics (#8925) - We previously added these to the client host metrics, but it's useful to have them on all client metrics. - e.g. so you can exclude draining nodes from charts showing your fleet size. --- client/client.go | 55 ++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/client/client.go b/client/client.go index 16339d25eb0..115e5086705 100644 --- a/client/client.go +++ b/client/client.go @@ -2842,7 +2842,7 @@ func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, ba } // setGaugeForAllocationStats proxies metrics for allocation specific statistics -func (c *Client) setGaugeForAllocationStats(nodeID string) { +func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics.Label) { c.configLock.RLock() node := c.configCopy.Node c.configLock.RUnlock() @@ -2852,9 +2852,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) { // Emit allocated if !c.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), c.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), c.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), c.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), baseLabels) } if c.config.BackwardsCompatibleMetrics { @@ -2865,7 +2865,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) { for _, n := range allocated.Flattened.Networks { if !c.config.DisableTaggedMetrics { - labels := append(c.baseLabels, metrics.Label{ + labels := append(baseLabels, metrics.Label{ Name: "device", Value: n.Device, }) @@ -2883,9 +2883,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) { unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares if !c.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), c.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), c.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), c.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels) } if c.config.BackwardsCompatibleMetrics { @@ -2905,7 +2905,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) { unallocatedMbits := n.MBits - usedMbits if !c.config.DisableTaggedMetrics { - labels := append(c.baseLabels, metrics.Label{ + labels := append(baseLabels, metrics.Label{ Name: "device", Value: n.Device, }) @@ -2932,16 +2932,7 @@ func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics func (c *Client) emitHostStats() { nodeID := c.NodeID() hStats := c.hostStatsCollector.Stats() - - c.configLock.RLock() - nodeStatus := c.configCopy.Node.Status - nodeEligibility := c.configCopy.Node.SchedulingEligibility - c.configLock.RUnlock() - - labels := append(c.baseLabels, - metrics.Label{Name: "node_status", Value: nodeStatus}, - metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility}, - ) + labels := c.labels() c.setGaugeForMemoryStats(nodeID, hStats, labels) c.setGaugeForUptime(hStats, labels) @@ -2952,8 +2943,9 @@ func (c *Client) emitHostStats() { // emitClientMetrics emits lower volume client metrics func (c *Client) emitClientMetrics() { nodeID := c.NodeID() + labels := c.labels() - c.setGaugeForAllocationStats(nodeID) + c.setGaugeForAllocationStats(nodeID, labels) // Emit allocation metrics blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0 @@ -2976,11 +2968,11 @@ func (c *Client) emitClientMetrics() { } if !c.config.DisableTaggedMetrics { - metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), c.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), c.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), c.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), c.baseLabels) - metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), c.baseLabels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), labels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), labels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), labels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), labels) + metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), labels) } if c.config.BackwardsCompatibleMetrics { @@ -2992,6 +2984,19 @@ func (c *Client) emitClientMetrics() { } } +// labels takes the base labels and appends the node state +func (c *Client) labels() []metrics.Label { + c.configLock.RLock() + nodeStatus := c.configCopy.Node.Status + nodeEligibility := c.configCopy.Node.SchedulingEligibility + c.configLock.RUnlock() + + return append(c.baseLabels, + metrics.Label{Name: "node_status", Value: nodeStatus}, + metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility}, + ) +} + func (c *Client) getAllocatedResources(selfNode *structs.Node) *structs.ComparableResources { // Unfortunately the allocs only have IP so we need to match them to the // device