Skip to content

Commit

Permalink
Add node "status", "scheduling eligibility" to all client metrics (#8925
Browse files Browse the repository at this point in the history
)

- We previously added these to the client host metrics, but it's useful to have them on all client metrics.
- e.g. so you can exclude draining nodes from charts showing your fleet size.
  • Loading branch information
pete-woods authored Sep 22, 2020
1 parent ed385bd commit f40e6ee
Showing 1 changed file with 30 additions and 25 deletions.
55 changes: 30 additions & 25 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2842,7 +2842,7 @@ func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, ba
}

// setGaugeForAllocationStats proxies metrics for allocation specific statistics
func (c *Client) setGaugeForAllocationStats(nodeID string) {
func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics.Label) {
c.configLock.RLock()
node := c.configCopy.Node
c.configLock.RUnlock()
Expand All @@ -2852,9 +2852,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {

// Emit allocated
if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "memory"}, float32(allocated.Flattened.Memory.MemoryMB), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "disk"}, float32(allocated.Shared.DiskMB), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocated", "cpu"}, float32(allocated.Flattened.Cpu.CpuShares), baseLabels)
}

if c.config.BackwardsCompatibleMetrics {
Expand All @@ -2865,7 +2865,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {

for _, n := range allocated.Flattened.Networks {
if !c.config.DisableTaggedMetrics {
labels := append(c.baseLabels, metrics.Label{
labels := append(baseLabels, metrics.Label{
Name: "device",
Value: n.Device,
})
Expand All @@ -2883,9 +2883,9 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {
unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares

if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "unallocated", "cpu"}, float32(unallocatedCpu), baseLabels)
}

if c.config.BackwardsCompatibleMetrics {
Expand All @@ -2905,7 +2905,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string) {

unallocatedMbits := n.MBits - usedMbits
if !c.config.DisableTaggedMetrics {
labels := append(c.baseLabels, metrics.Label{
labels := append(baseLabels, metrics.Label{
Name: "device",
Value: n.Device,
})
Expand All @@ -2932,16 +2932,7 @@ func (c *Client) setGaugeForUptime(hStats *stats.HostStats, baseLabels []metrics
func (c *Client) emitHostStats() {
nodeID := c.NodeID()
hStats := c.hostStatsCollector.Stats()

c.configLock.RLock()
nodeStatus := c.configCopy.Node.Status
nodeEligibility := c.configCopy.Node.SchedulingEligibility
c.configLock.RUnlock()

labels := append(c.baseLabels,
metrics.Label{Name: "node_status", Value: nodeStatus},
metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility},
)
labels := c.labels()

c.setGaugeForMemoryStats(nodeID, hStats, labels)
c.setGaugeForUptime(hStats, labels)
Expand All @@ -2952,8 +2943,9 @@ func (c *Client) emitHostStats() {
// emitClientMetrics emits lower volume client metrics
func (c *Client) emitClientMetrics() {
nodeID := c.NodeID()
labels := c.labels()

c.setGaugeForAllocationStats(nodeID)
c.setGaugeForAllocationStats(nodeID, labels)

// Emit allocation metrics
blocked, migrating, pending, running, terminal := 0, 0, 0, 0, 0
Expand All @@ -2976,11 +2968,11 @@ func (c *Client) emitClientMetrics() {
}

if !c.config.DisableTaggedMetrics {
metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), c.baseLabels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "migrating"}, float32(migrating), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "blocked"}, float32(blocked), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "pending"}, float32(pending), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "running"}, float32(running), labels)
metrics.SetGaugeWithLabels([]string{"client", "allocations", "terminal"}, float32(terminal), labels)
}

if c.config.BackwardsCompatibleMetrics {
Expand All @@ -2992,6 +2984,19 @@ func (c *Client) emitClientMetrics() {
}
}

// labels takes the base labels and appends the node state
func (c *Client) labels() []metrics.Label {
c.configLock.RLock()
nodeStatus := c.configCopy.Node.Status
nodeEligibility := c.configCopy.Node.SchedulingEligibility
c.configLock.RUnlock()

return append(c.baseLabels,
metrics.Label{Name: "node_status", Value: nodeStatus},
metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility},
)
}

func (c *Client) getAllocatedResources(selfNode *structs.Node) *structs.ComparableResources {
// Unfortunately the allocs only have IP so we need to match them to the
// device
Expand Down

0 comments on commit f40e6ee

Please sign in to comment.