Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

http health check bug fixes #16697

Merged
merged 1 commit into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions server/etcdserver/api/etcdhttp/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,25 @@ type ServerHealth interface {
Leader() types.ID
Range(context.Context, *pb.RangeRequest) (*pb.RangeResponse, error)
Config() config.ServerConfig
AuthStore() auth.AuthStore
}

// HandleHealth registers metrics and health handlers. it checks health by using v3 range request
// and its corresponding timeout.
func HandleHealth(lg *zap.Logger, mux *http.ServeMux, srv ServerHealth) {
mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health {
mux.Handle(PathHealth, NewHealthHandler(lg, func(ctx context.Context, excludedAlarms AlarmSet, serializable bool) Health {
if h := checkAlarms(lg, srv, excludedAlarms); h.Health != "true" {
return h
}
if h := checkLeader(lg, srv, serializable); h.Health != "true" {
return h
}
return checkAPI(lg, srv, serializable)
return checkAPI(ctx, lg, srv, serializable)
}))
}

// NewHealthHandler handles '/health' requests.
func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet, Serializable bool) Health) http.HandlerFunc {
func NewHealthHandler(lg *zap.Logger, hfunc func(ctx context.Context, excludedAlarms AlarmSet, Serializable bool) Health) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
w.Header().Set("Allow", http.MethodGet)
Expand All @@ -71,7 +72,7 @@ func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet, Serial
// This is useful for probes attempting to validate the liveness of
// the etcd process vs readiness of the cluster to serve requests.
serializableFlag := getSerializableFlag(r)
h := hfunc(excludedAlarms, serializableFlag)
h := hfunc(r.Context(), excludedAlarms, serializableFlag)
defer func() {
if h.Health == "true" {
healthSuccess.Inc()
Expand Down Expand Up @@ -178,13 +179,14 @@ func checkLeader(lg *zap.Logger, srv ServerHealth, serializable bool) Health {
return h
}

func checkAPI(lg *zap.Logger, srv ServerHealth, serializable bool) Health {
func checkAPI(ctx context.Context, lg *zap.Logger, srv ServerHealth, serializable bool) Health {
h := Health{Health: "true"}
cfg := srv.Config()
ctx, cancel := context.WithTimeout(context.Background(), cfg.ReqTimeout())
_, err := srv.Range(ctx, &pb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable})
ctx = srv.AuthStore().WithRoot(ctx)
chaochn47 marked this conversation as resolved.
Show resolved Hide resolved
cctx, cancel := context.WithTimeout(ctx, cfg.ReqTimeout())
_, err := srv.Range(cctx, &pb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable})
cancel()
if err != nil && err != auth.ErrUserEmpty && err != auth.ErrPermissionDenied {
if err != nil {
h.Health = "false"
h.Reason = fmt.Sprintf("RANGE ERROR:%s", err)
lg.Warn("serving /health false; Range fails", zap.Error(err))
Expand Down
33 changes: 14 additions & 19 deletions server/etcdserver/api/etcdhttp/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,26 @@ import (

"go.uber.org/zap/zaptest"

"go.etcd.io/raft/v3"

pb "go.etcd.io/etcd/api/v3/etcdserverpb"
"go.etcd.io/etcd/client/pkg/v3/testutil"
"go.etcd.io/etcd/client/pkg/v3/types"
"go.etcd.io/etcd/server/v3/auth"
"go.etcd.io/etcd/server/v3/config"
"go.etcd.io/etcd/server/v3/etcdserver"
"go.etcd.io/raft/v3"
betesting "go.etcd.io/etcd/server/v3/storage/backend/testing"
"go.etcd.io/etcd/server/v3/storage/schema"
)

type fakeHealthServer struct {
fakeServer
health string
apiError error
health string
apiError error
authStore auth.AuthStore
}

func (s *fakeHealthServer) Range(ctx context.Context, request *pb.RangeRequest) (*pb.RangeResponse, error) {
func (s *fakeHealthServer) Range(_ context.Context, _ *pb.RangeRequest) (*pb.RangeResponse, error) {
return nil, s.apiError
}

Expand All @@ -54,12 +58,13 @@ func (s *fakeHealthServer) Leader() types.ID {
}
return types.ID(raft.None)
}
func (s *fakeHealthServer) Do(ctx context.Context, r pb.Request) (etcdserver.Response, error) {
func (s *fakeHealthServer) Do(_ context.Context, _ pb.Request) (etcdserver.Response, error) {
if s.health == "true" {
return etcdserver.Response{}, nil
}
return etcdserver.Response{}, fmt.Errorf("fail health check")
}
func (s *fakeHealthServer) AuthStore() auth.AuthStore { return s.authStore }
func (s *fakeHealthServer) ClientCertAuthEnabled() bool { return false }

func TestHealthHandler(t *testing.T) {
Expand Down Expand Up @@ -123,20 +128,6 @@ func TestHealthHandler(t *testing.T) {
expectStatusCode: http.StatusOK,
expectHealth: "true",
},
{
name: "Healthy even if authentication failed",
healthCheckURL: "/health",
apiError: auth.ErrUserEmpty,
expectStatusCode: http.StatusOK,
expectHealth: "true",
},
{
name: "Healthy even if authorization failed",
healthCheckURL: "/health",
apiError: auth.ErrPermissionDenied,
expectStatusCode: http.StatusOK,
expectHealth: "true",
},
{
name: "Unhealthy if api is not available",
healthCheckURL: "/health",
Expand All @@ -149,10 +140,14 @@ func TestHealthHandler(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mux := http.NewServeMux()
lg := zaptest.NewLogger(t)
be, _ := betesting.NewDefaultTmpBackend(t)
chaochn47 marked this conversation as resolved.
Show resolved Hide resolved
defer betesting.Close(t, be)
HandleHealth(zaptest.NewLogger(t), mux, &fakeHealthServer{
fakeServer: fakeServer{alarms: tt.alarms},
health: tt.expectHealth,
apiError: tt.apiError,
authStore: auth.NewAuthStore(lg, schema.NewAuthBackend(lg, be), nil, 0),
})
ts := httptest.NewServer(mux)
defer ts.Close()
Expand Down
8 changes: 6 additions & 2 deletions server/proxy/grpcproxy/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,19 @@ func HandleHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) {
if lg == nil {
lg = zap.NewNop()
}
mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health { return checkHealth(c) }))
mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(lg, func(ctx context.Context, excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health {
return checkHealth(c)
}))
}

// HandleProxyHealth registers health handler on '/proxy/health'.
func HandleProxyHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) {
if lg == nil {
lg = zap.NewNop()
}
mux.Handle(etcdhttp.PathProxyHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health { return checkProxyHealth(c) }))
mux.Handle(etcdhttp.PathProxyHealth, etcdhttp.NewHealthHandler(lg, func(ctx context.Context, excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health {
return checkProxyHealth(c)
}))
}

func checkHealth(c *clientv3.Client) etcdhttp.Health {
Expand Down
Loading