Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filterable Health Tags #1304

Merged
merged 13 commits into from
Apr 10, 2023
24 changes: 22 additions & 2 deletions api/health/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
package health

import (
"fmt"
"net/http"

stdjson "encoding/json"

"github.com/gorilla/rpc/v2"

"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/utils/json"
"github.com/ava-labs/avalanchego/utils/logging"
)
Expand Down Expand Up @@ -45,14 +47,32 @@ func NewGetAndPostHandler(log logging.Logger, reporter Reporter) (http.Handler,
return handler, err
}

type errorMsg struct {
Error string `json:"error"`
}

// NewGetHandler return a health handler that supports GET requests reporting
// the result of the provided [reporter].
func NewGetHandler(reporter func() (map[string]Result, bool)) http.Handler {
func NewGetHandler(reporter func(tags ...string) (map[string]Result, bool)) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Make sure the content type is set before writing the header.
w.Header().Set("Content-Type", "application/json")

checks, healthy := reporter()
subnetIDs := r.URL.Query()["subnetID"]
danlaine marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kinda weird that the key is subnetID and it returns subnetIDs... But I do think this is the best way to do this with the query params...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea and it's even weirder that r.URL.Query().Get("subnetID") returns the first value for the key... alternatively we can use plural subnetIDs (now tags) and split string value by comas. but I probably would prefer singular key names (i.e tag over tags).


// check if the subnetID is a valid ID
for _, subnetID := range subnetIDs {
if _, err := ids.FromString(subnetID); err != nil {
w.WriteHeader(http.StatusBadRequest)
err := fmt.Errorf("invalid subnetID %s: %w", subnetID, err)
danlaine marked this conversation as resolved.
Show resolved Hide resolved
_ = stdjson.NewEncoder(w).Encode(errorMsg{
Error: err.Error(),
})
return
}
}
danlaine marked this conversation as resolved.
Show resolved Hide resolved

checks, healthy := reporter(subnetIDs...)
if !healthy {
// If a health check has failed, we should return a 503.
w.WriteHeader(http.StatusServiceUnavailable)
Expand Down
38 changes: 20 additions & 18 deletions api/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import (

var _ Health = (*health)(nil)

const GlobalTag = "global"
danlaine marked this conversation as resolved.
Show resolved Hide resolved

// Health defines the full health service interface for registering, reporting
// and refreshing health checks.
type Health interface {
Expand All @@ -28,16 +30,16 @@ type Health interface {

// Registerer defines how to register new components to check the health of.
type Registerer interface {
RegisterReadinessCheck(name string, checker Checker) error
RegisterHealthCheck(name string, checker Checker) error
RegisterLivenessCheck(name string, checker Checker) error
RegisterReadinessCheck(name string, checker Checker, tags ...string) error
RegisterHealthCheck(name string, checker Checker, tags ...string) error
RegisterLivenessCheck(name string, checker Checker, tags ...string) error
}

// Reporter returns the current health status.
type Reporter interface {
Readiness() (map[string]Result, bool)
Health() (map[string]Result, bool)
Liveness() (map[string]Result, bool)
Readiness(tags ...string) (map[string]Result, bool)
Health(tags ...string) (map[string]Result, bool)
Liveness(tags ...string) (map[string]Result, bool)
}

type health struct {
Expand Down Expand Up @@ -67,20 +69,20 @@ func New(log logging.Logger, registerer prometheus.Registerer) (Health, error) {
}, err
}

func (h *health) RegisterReadinessCheck(name string, checker Checker) error {
return h.readiness.RegisterMonotonicCheck(name, checker)
func (h *health) RegisterReadinessCheck(name string, checker Checker, tags ...string) error {
return h.readiness.RegisterMonotonicCheck(name, checker, tags...)
}

func (h *health) RegisterHealthCheck(name string, checker Checker) error {
return h.health.RegisterCheck(name, checker)
func (h *health) RegisterHealthCheck(name string, checker Checker, tags ...string) error {
return h.health.RegisterCheck(name, checker, tags...)
}

func (h *health) RegisterLivenessCheck(name string, checker Checker) error {
return h.liveness.RegisterCheck(name, checker)
func (h *health) RegisterLivenessCheck(name string, checker Checker, tags ...string) error {
return h.liveness.RegisterCheck(name, checker, tags...)
}

func (h *health) Readiness() (map[string]Result, bool) {
results, healthy := h.readiness.Results()
func (h *health) Readiness(tags ...string) (map[string]Result, bool) {
results, healthy := h.readiness.Results(tags...)
if !healthy {
h.log.Warn("failing readiness check",
zap.Reflect("reason", results),
Expand All @@ -89,8 +91,8 @@ func (h *health) Readiness() (map[string]Result, bool) {
return results, healthy
}

func (h *health) Health() (map[string]Result, bool) {
results, healthy := h.health.Results()
func (h *health) Health(tags ...string) (map[string]Result, bool) {
results, healthy := h.health.Results(tags...)
if !healthy {
h.log.Warn("failing health check",
zap.Reflect("reason", results),
Expand All @@ -99,8 +101,8 @@ func (h *health) Health() (map[string]Result, bool) {
return results, healthy
}

func (h *health) Liveness() (map[string]Result, bool) {
results, healthy := h.liveness.Results()
func (h *health) Liveness(tags ...string) (map[string]Result, bool) {
results, healthy := h.liveness.Results(tags...)
if !healthy {
h.log.Warn("failing liveness check",
zap.Reflect("reason", results),
Expand Down
76 changes: 76 additions & 0 deletions api/health/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,79 @@ func TestDeadlockRegression(t *testing.T) {

awaitHealthy(h, true)
}

func TestTags(t *testing.T) {
require := require.New(t)

check := CheckerFunc(func(context.Context) (interface{}, error) {
return "", nil
})

h, err := New(logging.NoLog{}, prometheus.NewRegistry())
require.NoError(err)
err = h.RegisterHealthCheck("check1", check)
require.NoError(err)
err = h.RegisterHealthCheck("check2", check, "tag1")
require.NoError(err)
err = h.RegisterHealthCheck("check3", check, "tag2")
require.NoError(err)
err = h.RegisterHealthCheck("check4", check, "tag1", "tag2")
require.NoError(err)

// default checks
{
healthResult, health := h.Health()
require.Len(healthResult, 4)
require.Contains(healthResult, "check1")
require.Contains(healthResult, "check2")
require.Contains(healthResult, "check3")
require.Contains(healthResult, "check4")
require.False(health)

healthResult, health = h.Health("tag1")
require.Len(healthResult, 2)
require.Contains(healthResult, "check2")
require.Contains(healthResult, "check4")
require.False(health)
}
danlaine marked this conversation as resolved.
Show resolved Hide resolved

h.Start(context.Background(), checkFreq)
defer h.Stop()

awaitHealthy(h, true)

{
healthResult, health := h.Health()
require.Len(healthResult, 4)
require.Contains(healthResult, "check1")
require.Contains(healthResult, "check2")
require.Contains(healthResult, "check3")
require.Contains(healthResult, "check4")
require.True(health)

healthResult, health = h.Health("tag1")
require.Len(healthResult, 2)
require.Contains(healthResult, "check2")
require.Contains(healthResult, "check4")
require.True(health)
}

// now we'll add a new failing check
danlaine marked this conversation as resolved.
Show resolved Hide resolved
{
err = h.RegisterHealthCheck("check5", check, "tag1")
require.NoError(err)

healthResult, health := h.Health("tag1")
require.Len(healthResult, 3)
require.Contains(healthResult, "check2")
require.Contains(healthResult, "check4")
require.Contains(healthResult, "check5")
require.False(health)

healthResult, health = h.Health("tag2")
require.Len(healthResult, 2)
require.Contains(healthResult, "check3")
require.Contains(healthResult, "check4")
require.True(health)
}
}
21 changes: 19 additions & 2 deletions api/health/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (

"go.uber.org/zap"

"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/utils/logging"
)

Expand All @@ -22,6 +23,10 @@ type APIReply struct {
Healthy bool `json:"healthy"`
}

type HealthArgs struct {
SubnetIDs []ids.ID `json:"subnetIDs"`
danlaine marked this conversation as resolved.
Show resolved Hide resolved
}

// Readiness returns if the node has finished initialization
func (s *Service) Readiness(_ *http.Request, _ *struct{}, reply *APIReply) error {
danlaine marked this conversation as resolved.
Show resolved Hide resolved
s.log.Debug("API called",
Expand All @@ -33,12 +38,24 @@ func (s *Service) Readiness(_ *http.Request, _ *struct{}, reply *APIReply) error
}

// Health returns a summation of the health of the node
func (s *Service) Health(_ *http.Request, _ *struct{}, reply *APIReply) error {
func (s *Service) Health(_ *http.Request, args *HealthArgs, reply *APIReply) error {
danlaine marked this conversation as resolved.
Show resolved Hide resolved
s.log.Debug("API called",
danlaine marked this conversation as resolved.
Show resolved Hide resolved
zap.String("service", "health"),
zap.String("method", "health"),
)
reply.Checks, reply.Healthy = s.health.Health()

if args == nil || len(args.SubnetIDs) == 0 {
danlaine marked this conversation as resolved.
Show resolved Hide resolved
danlaine marked this conversation as resolved.
Show resolved Hide resolved
reply.Checks, reply.Healthy = s.health.Health()
return nil
}

// convert subnetIDs to string tags
tags := make([]string, len(args.SubnetIDs))
for i, subnetID := range args.SubnetIDs {
tags[i] = subnetID.String()
}

reply.Checks, reply.Healthy = s.health.Health(tags...)
return nil
}

Expand Down
81 changes: 81 additions & 0 deletions api/health/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/stretchr/testify/require"

"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/utils/logging"
)

Expand Down Expand Up @@ -112,3 +113,83 @@ func TestServiceResponses(t *testing.T) {
require.True(reply.Healthy)
}
}

func TestServiceTagResponse(t *testing.T) {
require := require.New(t)

check := CheckerFunc(func(context.Context) (interface{}, error) {
return "", nil
})

subnetID1 := ids.GenerateTestID()
subnetID2 := ids.GenerateTestID()

h, err := New(logging.NoLog{}, prometheus.NewRegistry())
require.NoError(err)
err = h.RegisterHealthCheck("check1", check)
require.NoError(err)
err = h.RegisterHealthCheck("check2", check, subnetID1.String())
require.NoError(err)
err = h.RegisterHealthCheck("check3", check, subnetID2.String())
require.NoError(err)
err = h.RegisterHealthCheck("check4", check, subnetID1.String(), subnetID2.String())
require.NoError(err)

s := &Service{
log: logging.NoLog{},
health: h,
}

// default checks
{
reply := APIReply{}
err = s.Health(nil, nil, &reply)
require.NoError(err)
require.Len(reply.Checks, 4)
require.Contains(reply.Checks, "check1")
require.Contains(reply.Checks, "check2")
require.Contains(reply.Checks, "check3")
require.Contains(reply.Checks, "check4")
require.Equal(notYetRunResult, reply.Checks["check1"])
require.False(reply.Healthy)

err = s.Health(nil, &HealthArgs{SubnetIDs: []ids.ID{subnetID1}}, &reply)
require.NoError(err)
require.Len(reply.Checks, 2)
require.Contains(reply.Checks, "check2")
require.Contains(reply.Checks, "check4")
require.Equal(notYetRunResult, reply.Checks["check2"])
require.False(reply.Healthy)
}

h.Start(context.Background(), checkFreq)
defer h.Stop()

awaitHealthy(h, true)

{
reply := APIReply{}
err = s.Health(nil, &HealthArgs{SubnetIDs: []ids.ID{subnetID1}}, &reply)
require.NoError(err)
require.Len(reply.Checks, 2)
require.Contains(reply.Checks, "check2")
require.Contains(reply.Checks, "check4")
require.True(reply.Healthy)
}

// now we'll add a new failing check
danlaine marked this conversation as resolved.
Show resolved Hide resolved
{
err = h.RegisterHealthCheck("check5", check, subnetID1.String())
require.NoError(err)

reply := APIReply{}
err = s.Health(nil, &HealthArgs{SubnetIDs: []ids.ID{subnetID1}}, &reply)
require.NoError(err)
require.Len(reply.Checks, 3)
require.Contains(reply.Checks, "check2")
require.Contains(reply.Checks, "check4")
require.Contains(reply.Checks, "check5")
require.Equal(notYetRunResult, reply.Checks["check5"])
require.False(reply.Healthy)
}
}
Loading