Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server: always create a liveness record before starting up #53805

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions pkg/cmd/roachtest/decommission.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,21 +309,6 @@ func runDecommissionRandomized(ctx context.Context, t *test, c *cluster) {
Multiplier: 2,
}

// This is a pretty gross hack to let the bootstrap info (cluster ID,
// liveness records) disseminate through the cluster. Since it's no longer
// happening through gossip, it takes a bit longer to happen. We should do
// two things to improve our story here:
//
// - We should opportunistically write to the liveness table when adding a
// node through the Join RPC. This would also simplify the handling of
// empty liveness records (they would no longer exist).
// - We should add roachtest helpers that wait until each node has received
// cluster ID information, and use it in all the tests that need it (which
// may very well be all the tests).
//
// TODO(irfansharif): Do the above.
time.Sleep(30 * time.Second)

// Partially decommission then recommission a random node, from another
// random node. Run a couple of status checks while doing so.
{
Expand Down
14 changes: 4 additions & 10 deletions pkg/kv/kvserver/node_liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -455,16 +455,10 @@ func (nl *NodeLiveness) setMembershipStatusInternal(
if oldLivenessRec.Liveness == (kvserverpb.Liveness{}) {
// Liveness record didn't previously exist, so we create one.
//
// TODO(irfansharif): This code feels a bit unwieldy because it's
// possible for a liveness record to not exist previously. It is just
// generally difficult to write it at startup. When a node joins the
// cluster, this completes before it has had a chance to write its
// liveness record. If it gets decommissioned immediately, there won't
// be one yet. The Connect RPC can solve this though, I think? We can
// bootstrap clusters with a liveness record for n1. Any other node at
// some point has to join the cluster for the first time via the Connect
// RPC, which as part of its job can make sure the liveness record
// exists before responding to the new node.
// TODO(irfansharif): The above is now no longer possible. We always
// create a liveness record before fully starting up the node. We should
// clean up all this logic that tries to work around the possibility of
// the liveness record not existing.
newLiveness = kvserverpb.Liveness{
NodeID: nodeID,
Epoch: 1,
Expand Down
26 changes: 21 additions & 5 deletions pkg/server/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,8 @@ func (s *initServer) needsInitLocked() bool {
// necessarily all. This is fine, since initializing additional stores later is
// easy.
//
// `initialBoot` is true if this is a new node. This flag should only be used
// for logging and reporting. A newly bootstrapped single-node cluster is
// functionally equivalent to one that restarted; any decisions should be made
// on persisted data instead of this flag.
// `initialStart` is true if this is a new node (i.e. it was either just
// bootstrapped, or it just joined an existing cluster for the first time).
//
// [1]: In mixed version clusters it waits until Gossip connects (but this is
// slated to be removed in 21.1).
Expand All @@ -191,7 +189,7 @@ func (s *initServer) ServeAndWait(
stopper *stop.Stopper,
sv *settings.Values,
startGossipFn func() *gossip.Gossip,
) (state *initState, initialBoot bool, err error) {
) (state *initState, initialStart bool, err error) {
// If already bootstrapped, return early.
s.mu.Lock()
if !s.needsInitLocked() {
Expand Down Expand Up @@ -390,6 +388,24 @@ func (s *initServer) startJoinLoop(ctx context.Context, stopper *stop.Stopper) e
return ErrJoinRPCUnsupported
}

// Busy-loop through all the resolvers at least once. Keep this code block
// roughly in sync with the one below.
for _, res := range s.config.resolvers {
addr := res.Addr()
err := s.attemptJoin(ctx, addr)
if err == nil {
return nil
}

if errors.Is(err, ErrJoinRPCUnsupported) || errors.Is(err, ErrIncompatibleBinaryVersion) {
// Propagate upwards; these are error conditions the caller knows to
// expect.
return err
}

// Ignore all other errors, they'll be better dealt with below.
}

const joinRPCBackoff = time.Second
var tickChan <-chan time.Time
{
Expand Down
4 changes: 0 additions & 4 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -1122,10 +1122,6 @@ func (n *Node) GossipSubscription(
// Join implements the roachpb.InternalServer service. This is the
// "connectivity" API; individual CRDB servers are passed in a --join list and
// the join targets are addressed through this API.
//
// TODO(irfansharif): Perhaps we could opportunistically create a liveness
// record here so as to no longer have to worry about the liveness record not
// existing for a given node.
func (n *Node) Join(
ctx context.Context, req *roachpb.JoinNodeRequest,
) (*roachpb.JoinNodeResponse, error) {
Expand Down
46 changes: 29 additions & 17 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1402,11 +1402,9 @@ func (s *Server) Start(ctx context.Context) error {
// one, make sure it's the clusterID we already know (and are guaranteed to
// know) at this point. If it's not the same, explode.
//
// TODO(tbg): remove this when we have changed ServeAndWait() to join an
// existing cluster via a one-off RPC, at which point we can create gossip
// (and thus the RPC layer) only after the clusterID is already known. We
// can then rely on the RPC layer's protection against cross-cluster
// communication.
// TODO(irfansharif): The above is no longer applicable; in 21.1 we can
// always assume that the RPC layer will always get set up after having
// found out what the cluster ID is. The checks below can be removed then.
{
// We populated this above, so it should still be set. This is just to
// demonstrate that we're not doing anything functional here (and to
Expand Down Expand Up @@ -1538,6 +1536,32 @@ func (s *Server) Start(ctx context.Context) error {
}
})

// Begin the node liveness heartbeat. Add a callback which records the
// local store "last up" timestamp for every store whenever the liveness
// record is updated. We're sure to do this before we open RPC
// floodgates.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not guaranteed since it's async, so add that it's best effort

var livenessOnce sync.Once
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: don't leak livenessOnce into main scope, do

livenessRecordCreated := make(chan struct{}, 1)
{
  // all the other stuff
}
if initialStart { ... }

livenessRecordCreated := make(chan struct{}, 1)
s.nodeLiveness.StartHeartbeat(ctx, s.stopper, s.engines, func(ctx context.Context) {
now := s.clock.Now()
if err := s.node.stores.VisitStores(func(s *kvserver.Store) error {
return s.WriteLastUpTimestamp(ctx, now)
}); err != nil {
log.Warningf(ctx, "writing last up timestamp: %v", err)
}
livenessOnce.Do(func() {
livenessRecordCreated <- struct{}{}
})
})

if initialStart {
// If we're a new node being added, we're going to wait for the
// liveness record to be created before allowing all RPCs below.
<-livenessRecordCreated
}

// Allow all RPCs, this server can now be considered to be fully
// initialized.
s.grpc.setMode(modeOperational)

log.Infof(ctx, "starting %s server at %s (use: %s)",
Expand All @@ -1552,18 +1576,6 @@ func (s *Server) Start(ctx context.Context) error {

log.Event(ctx, "accepting connections")

// Begin the node liveness heartbeat. Add a callback which records the local
// store "last up" timestamp for every store whenever the liveness record is
// updated.
s.nodeLiveness.StartHeartbeat(ctx, s.stopper, s.engines, func(ctx context.Context) {
now := s.clock.Now()
if err := s.node.stores.VisitStores(func(s *kvserver.Store) error {
return s.WriteLastUpTimestamp(ctx, now)
}); err != nil {
log.Warningf(ctx, "writing last up timestamp: %v", err)
}
})

// Begin recording status summaries.
s.node.startWriteNodeStatus(base.DefaultMetricsSampleInterval)

Expand Down