-
Notifications
You must be signed in to change notification settings - Fork 3.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
server: fix broken /health #45119
server: fix broken /health #45119
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ import ( | |
"github.com/cockroachdb/cockroach/pkg/security" | ||
"github.com/cockroachdb/cockroach/pkg/server/debug" | ||
"github.com/cockroachdb/cockroach/pkg/server/serverpb" | ||
"github.com/cockroachdb/cockroach/pkg/server/telemetry" | ||
"github.com/cockroachdb/cockroach/pkg/settings" | ||
"github.com/cockroachdb/cockroach/pkg/settings/cluster" | ||
"github.com/cockroachdb/cockroach/pkg/sql" | ||
|
@@ -1294,19 +1295,63 @@ func (s *adminServer) Cluster( | |
|
||
// Health returns liveness for the node target of the request. | ||
// | ||
// NB: this is deprecated. If you're looking for the code serving /health, | ||
// you want (*statusServer).Details. | ||
// See the docstring for HealthRequest for more details about | ||
// what this function precisely reports. | ||
// | ||
// Note: Health is non-privileged and non-authenticated and thus | ||
// must not report privileged information. | ||
func (s *adminServer) Health( | ||
ctx context.Context, req *serverpb.HealthRequest, | ||
) (*serverpb.HealthResponse, error) { | ||
isLive, err := s.server.nodeLiveness.IsLive(s.server.NodeID()) | ||
telemetry.Inc(telemetryHealthCheck) | ||
|
||
resp := &serverpb.HealthResponse{} | ||
// If Ready is not set, the client doesn't want to know whether this node is | ||
// ready to receive client traffic. | ||
if !req.Ready { | ||
return resp, nil | ||
} | ||
|
||
if err := s.checkReadinessForHealthCheck(); err != nil { | ||
return nil, err | ||
} | ||
return resp, nil | ||
} | ||
|
||
func (s *adminServer) checkReadinessForHealthCheck() error { | ||
serveMode := s.server.grpc.mode.get() | ||
switch serveMode { | ||
case modeInitializing: | ||
return status.Error(codes.Unavailable, "node is waiting for cluster initialization") | ||
case modeDraining: | ||
// grpc.mode is set to modeDraining when the Drain(DrainMode_CLIENT) has | ||
// been called (client connections are to be drained). | ||
return status.Errorf(codes.Unavailable, "node is shutting down") | ||
case modeOperational: | ||
break | ||
default: | ||
return s.serverError(errors.Newf("unknown mode: %v", serveMode)) | ||
} | ||
|
||
// TODO(knz): update this code when progress is made on | ||
// https://github.com/cockroachdb/cockroach/issues/45123 | ||
l, err := s.server.nodeLiveness.GetLiveness(s.server.NodeID()) | ||
if err != nil { | ||
return nil, status.Errorf(codes.Internal, err.Error()) | ||
return s.serverError(err) | ||
} | ||
if !isLive { | ||
return nil, status.Errorf(codes.Unavailable, "node is not live") | ||
if !l.IsLive(s.server.clock.Now().GoTime()) { | ||
return status.Errorf(codes.Unavailable, "node is not healthy") | ||
} | ||
return &serverpb.HealthResponse{}, nil | ||
if l.Draining { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the problem with just using |
||
// l.Draining indicates that the node is draining leases. | ||
// This is set when Drain(DrainMode_LEASES) is called. | ||
// It's possible that l.Draining is set without | ||
// grpc.mode being modeDraining, if a RPC client | ||
// has requested DrainMode_LEASES but not DrainMode_CLIENT. | ||
return status.Errorf(codes.Unavailable, "node is shutting down") | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// getLivenessStatusMap generates a map from NodeID to LivenessStatus for all | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See #45123, I think you want to use
storage.LivenessStatus(.)
(and no this should not just sit around instorage
like that at all, but there it is).