Skip to content
This repository has been archived by the owner on Mar 27, 2024. It is now read-only.

feat(consul): collect agent metrics #900

Merged
merged 2 commits into from
Oct 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 113 additions & 4 deletions modules/consul/charts.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,119 @@ import (
)

const (
prioServiceHealthCheckStatus = module.Priority + iota
_ = module.Priority + iota
prioServerLeadershipStatus

prioAutopilotHealthState
prioAutopilotFailureTolerance

prioMemoryAllocated
prioMemorySys

prioGCPauseTime

prioRPCRequests

prioServiceHealthCheckStatus
prioUnboundHealthCheckStatus
)

var (
globalCharts = module.Charts{
chartServerLeadershipStatus.Copy(),
chartAutopilotHealthState.Copy(),
chartAutopilotFailureTolerance.Copy(),
chartMemoryAllocated.Copy(),
chartMemorySys.Copy(),
chartGCPauseTime.Copy(),
chartClientRPCRequestsRate.Copy(),
}

chartServerLeadershipStatus = module.Chart{
ID: "server_leadership_status",
Title: "Server leadership status",
Units: "status",
Fam: "leadership",
Ctx: "consul.server_leadership_status",
Priority: prioServerLeadershipStatus,
Dims: module.Dims{
{ID: "consul.server.isLeader.yes", Name: "leader"},
{ID: "consul.server.isLeader.no", Name: "not_leader"},
},
}

chartAutopilotHealthState = module.Chart{
ID: "autopilot_health_state",
Title: "Autopilot health state",
Units: "state",
Fam: "autopilot",
Ctx: "consul.autopilot_health_state",
Priority: prioAutopilotHealthState,
Dims: module.Dims{
{ID: "consul.autopilot.healthy.yes", Name: "healthy"},
{ID: "consul.autopilot.healthy.no", Name: "unhealthy"},
},
}
chartAutopilotFailureTolerance = module.Chart{
ID: "autopilot_failure_tolerance",
Title: "Autopilot failure tolerance",
Units: "servers",
Fam: "autopilot",
Ctx: "consul.autopilot_failure_tolerance",
Priority: prioAutopilotFailureTolerance,
Dims: module.Dims{
{ID: "consul.autopilot.failure_tolerance", Name: "tolerance"},
},
}

chartMemoryAllocated = module.Chart{
ID: "memory_allocated",
Title: "Memory allocated by the Consul process",
Units: "bytes",
Fam: "memory",
Ctx: "consul.memory_allocated",
Priority: prioMemoryAllocated,
Dims: module.Dims{
{ID: "consul.runtime.alloc_bytes", Name: "allocated"},
},
}
chartMemorySys = module.Chart{
ID: "memory_sys",
Title: "Memory obtained from the OS",
Units: "bytes",
Fam: "memory",
Ctx: "consul.memory_sys",
Priority: prioMemorySys,
Dims: module.Dims{
{ID: "consul.runtime.sys_bytes", Name: "sys"},
},
}

chartGCPauseTime = module.Chart{
ID: "gc_pause_time",
Title: "Garbage collection stop-the-world pause time",
Units: "seconds",
Fam: "garbage collection",
Ctx: "consul.gc_pause_time",
Priority: prioGCPauseTime,
Dims: module.Dims{
{ID: "consul.runtime.total_gc_pause_ns", Name: "gc_pause", Algo: module.Incremental, Div: 1e9},
},
}

chartClientRPCRequestsRate = module.Chart{
ID: "client_rpc_requests_rate",
Title: "Client RPC requests",
Units: "requests/s",
Fam: "client rpc",
Ctx: "consul.client_rpc_requests_rate",
Priority: prioRPCRequests,
Dims: module.Dims{
{ID: "consul.client.rpc", Name: "rpc", Algo: module.Incremental},
},
}
)

var (
chartTmplServiceHealthCheckStatus = module.Chart{
ID: "health_check_%s_status",
Expand Down Expand Up @@ -44,7 +153,7 @@ var (
}
)

func newServiceHealthCheckChart(check *healthCheck) *module.Chart {
func newServiceHealthCheckChart(check *agentCheck) *module.Chart {
chart := chartTmplServiceHealthCheckStatus.Copy()
chart.ID = fmt.Sprintf(chart.ID, check.CheckID)
chart.Labels = []module.Label{
Expand All @@ -57,7 +166,7 @@ func newServiceHealthCheckChart(check *healthCheck) *module.Chart {
return chart
}

func newUnboundHealthCheckChart(check *healthCheck) *module.Chart {
func newUnboundHealthCheckChart(check *agentCheck) *module.Chart {
chart := chartTmplUnboundHealthCheckStatus.Copy()
chart.ID = fmt.Sprintf(chart.ID, check.CheckID)
chart.Labels = []module.Label{
Expand All @@ -69,7 +178,7 @@ func newUnboundHealthCheckChart(check *healthCheck) *module.Chart {
return chart
}

func (c *Consul) addHealthCheckCharts(check *healthCheck) {
func (c *Consul) addHealthCheckCharts(check *agentCheck) {
var chart *module.Chart

if check.ServiceName != "" {
Expand Down
14 changes: 13 additions & 1 deletion modules/consul/collect.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// SPDX-License-Identifier: GPL-3.0-or-later

package consul

import (
Expand All @@ -12,7 +14,10 @@ import (
func (c *Consul) collect() (map[string]int64, error) {
mx := make(map[string]int64)

if err := c.collectLocalChecks(mx); err != nil {
if err := c.collectAgentChecks(mx); err != nil {
return nil, err
}
if err := c.collectAgentMetrics(mx); err != nil {
return nil, err
}

Expand Down Expand Up @@ -54,3 +59,10 @@ func closeBody(resp *http.Response) {
_ = resp.Body.Close()
}
}

func boolToInt(v bool) int64 {
if v {
return 1
}
return 0
}
20 changes: 7 additions & 13 deletions modules/consul/collect_checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

package consul

const urlPathChecks = "/v1/agent/checks"

type healthCheck struct {
type agentCheck struct {
Node string
CheckID string
Name string
Expand All @@ -14,10 +12,13 @@ type healthCheck struct {
ServiceTags []string
}

func (c *Consul) collectLocalChecks(mx map[string]int64) error {
var checks map[string]*healthCheck
// https://www.consul.io/api-docs/agent/check#list-checks
const urlPathAgentChecks = "/v1/agent/checks"

func (c *Consul) collectAgentChecks(mx map[string]int64) error {
var checks map[string]*agentCheck

if err := c.doOKDecode(urlPathChecks, &checks); err != nil {
if err := c.doOKDecode(urlPathAgentChecks, &checks); err != nil {
return err
}

Expand Down Expand Up @@ -47,10 +48,3 @@ func (c *Consul) collectLocalChecks(mx map[string]int64) error {

return nil
}

func boolToInt(v bool) int64 {
if v {
return 1
}
return 0
}
65 changes: 65 additions & 0 deletions modules/consul/collect_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// SPDX-License-Identifier: GPL-3.0-or-later

package consul

type agentMetrics struct {
Gauges []struct {
Name string
Value int64
Labels map[string]string
}
Counters []struct {
Name string
Count int64
Labels map[string]string
}
Samples []struct {
Name string
Count int64
Labels map[string]string
}
}

// https://www.consul.io/api-docs/agent#view-metrics
const urlPathAgentMetrics = "/v1/agent/metrics"

func (c *Consul) collectAgentMetrics(mx map[string]int64) error {
var metrics agentMetrics

if err := c.doOKDecode(urlPathAgentMetrics, &metrics); err != nil {
return err
}

for _, m := range metrics.Gauges {
switch m.Name {
case "consul.server.isLeader":
mx[m.Name+".yes"] = boolToInt(m.Value == 1)
mx[m.Name+".no"] = boolToInt(m.Value != 1)
case "consul.autopilot.healthy":
mx[m.Name+".yes"] = boolToInt(m.Value == 1)
mx[m.Name+".no"] = boolToInt(m.Value != 1)
case
"consul.autopilot.failure_tolerance",
"consul.runtime.alloc_bytes",
"consul.runtime.sys_bytes",
"consul.runtime.total_gc_pause_ns":
mx[m.Name] = m.Value
}
}

for _, m := range metrics.Counters {
switch m.Name {
case "consul.client.rpc":
mx[m.Name] = m.Count
}
}

for _, m := range metrics.Samples {
switch m.Name {
case "consul.client.rpc":
mx[m.Name] = m.Count
}
}

return nil
}
2 changes: 1 addition & 1 deletion modules/consul/consul.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func New() *Consul {
},
},
checks: make(map[string]bool),
charts: &module.Charts{},
charts: globalCharts.Copy(),
}
}

Expand Down
47 changes: 37 additions & 10 deletions modules/consul/consul_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,26 @@
package consul

import (
"github.com/netdata/go.d.plugin/pkg/web"
"net/http"
"net/http/httptest"
"os"
"testing"

"github.com/netdata/go.d.plugin/pkg/web"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

var (
dataHealthChecks, _ = os.ReadFile("testdata/checks.txt")
dataAgentChecks, _ = os.ReadFile("testdata/checks.txt")
dataAgentMetrics, _ = os.ReadFile("testdata/metrics.json")
)

func Test_testDataIsValid(t *testing.T) {
for name, data := range map[string][]byte{
"dataHealthChecks": dataHealthChecks,
"dataAgentChecks": dataAgentChecks,
"dataAgentMetrics": dataAgentMetrics,
} {
require.NotNilf(t, data, name)
}
Expand Down Expand Up @@ -103,8 +106,17 @@ func TestConsul_Collect(t *testing.T) {
}{
"success on response from Consul": {
prepare: caseConsulResponse,
wantNumOfCharts: 4,
wantNumOfCharts: 11,
wantMetrics: map[string]int64{
"consul.autopilot.failure_tolerance": 0,
"consul.autopilot.healthy.no": 0,
"consul.autopilot.healthy.yes": 1,
"consul.client.rpc": 1,
"consul.runtime.alloc_bytes": 14164144,
"consul.runtime.sys_bytes": 34685960,
"consul.runtime.total_gc_pause_ns": 9367254,
"consul.server.isLeader.no": 0,
"consul.server.isLeader.yes": 1,
"health_check_chk1_critical_status": 0,
"health_check_chk1_maintenance_status": 0,
"health_check_chk1_passing_status": 1,
Expand All @@ -125,8 +137,17 @@ func TestConsul_Collect(t *testing.T) {
},
"success on response from Consul with filtered checks": {
prepare: caseConsulResponseWithFilteredChecks,
wantNumOfCharts: 1,
wantNumOfCharts: 8,
wantMetrics: map[string]int64{
"consul.autopilot.failure_tolerance": 0,
"consul.autopilot.healthy.no": 0,
"consul.autopilot.healthy.yes": 1,
"consul.client.rpc": 1,
"consul.runtime.alloc_bytes": 14164144,
"consul.runtime.sys_bytes": 34685960,
"consul.runtime.total_gc_pause_ns": 9367254,
"consul.server.isLeader.no": 0,
"consul.server.isLeader.yes": 1,
"health_check_mysql_critical_status": 1,
"health_check_mysql_maintenance_status": 0,
"health_check_mysql_passing_status": 0,
Expand Down Expand Up @@ -158,7 +179,9 @@ func TestConsul_Collect(t *testing.T) {
mx := consul.Collect()

require.Equal(t, test.wantMetrics, mx)
assert.Equal(t, test.wantNumOfCharts, len(*consul.Charts()))
if len(test.wantMetrics) > 0 {
assert.Equal(t, test.wantNumOfCharts, len(*consul.Charts()))
}
})
}
}
Expand All @@ -168,8 +191,10 @@ func caseConsulResponse(t *testing.T) (*Consul, func()) {
srv := httptest.NewServer(http.HandlerFunc(
func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case urlPathChecks:
_, _ = w.Write(dataHealthChecks)
case urlPathAgentChecks:
_, _ = w.Write(dataAgentChecks)
case urlPathAgentMetrics:
_, _ = w.Write(dataAgentMetrics)
default:
w.WriteHeader(http.StatusNotFound)
}
Expand All @@ -188,8 +213,10 @@ func caseConsulResponseWithFilteredChecks(t *testing.T) (*Consul, func()) {
srv := httptest.NewServer(http.HandlerFunc(
func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case urlPathChecks:
_, _ = w.Write(dataHealthChecks)
case urlPathAgentChecks:
_, _ = w.Write(dataAgentChecks)
case urlPathAgentMetrics:
_, _ = w.Write(dataAgentMetrics)
default:
w.WriteHeader(http.StatusNotFound)
}
Expand Down
Loading