From 49d001db6169501903f6b6e682068137ae1a55e4 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Mon, 27 Mar 2023 14:24:40 +0200 Subject: [PATCH 1/2] Revert #98150 This reverts #98150 because we think it introduced a problem that was detected via the `replicate/wide` roachtest[^1]. It seems that for reasons still unknown we do rely on the periodic gossip trigger on liveness lease extensions. [^1]: https://github.com/cockroachdb/cockroach/issues/99268#issuecomment-1484968745 Touches #99268. Touches #97966. Closes #99268. Touches #98945. Epic: none Release note: None --- pkg/kv/kvserver/replica_proposal.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index d1496ff42d05..bcbab1929141 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -424,8 +424,7 @@ func (r *Replica) leasePostApplyLocked( // this gossip if we're taking over after a leaseholder failure. In both cases, // incremental liveness updates may have been lost, so we want to make sure that // the latest view of node liveness ends up in gossip. - nls := keys.NodeLivenessSpan - if leaseChangingHands && iAmTheLeaseHolder && kvserverbase.ContainsKeyRange(r.descRLocked(), nls.Key, nls.EndKey) { + if leaseChangingHands && iAmTheLeaseHolder { // NB: run these in an async task to keep them out of the critical section // (r.mu is held here). _ = r.store.stopper.RunAsyncTask(r.AnnotateCtx(context.Background()), "lease-triggers", func(ctx context.Context) { From c46b8a65924027d5689d5fcde2958c107f8868a9 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Mon, 27 Mar 2023 14:27:09 +0200 Subject: [PATCH 2/2] Revert #98150 (2/2) Epic: none Release note: None --- pkg/kv/kvserver/replica_proposal.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index bcbab1929141..a8cd8e100a60 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -419,12 +419,14 @@ func (r *Replica) leasePostApplyLocked( } } - // Potentially re-gossip if the range contains node liveness data. We need to - // perform this gossip at startup as soon as possible. We also need to perform - // this gossip if we're taking over after a leaseholder failure. In both cases, - // incremental liveness updates may have been lost, so we want to make sure that - // the latest view of node liveness ends up in gossip. - if leaseChangingHands && iAmTheLeaseHolder { + // Potentially re-gossip if the range contains system data (e.g. system + // config or node liveness). We need to perform this gossip at startup as + // soon as possible. Trying to minimize how often we gossip is a fool's + // errand. The node liveness info will be gossiped frequently (every few + // seconds) in any case due to the liveness heartbeats. And the system config + // will be gossiped rarely because it falls on a range with an epoch-based + // range lease that is only reacquired extremely infrequently. + if iAmTheLeaseHolder { // NB: run these in an async task to keep them out of the critical section // (r.mu is held here). _ = r.store.stopper.RunAsyncTask(r.AnnotateCtx(context.Background()), "lease-triggers", func(ctx context.Context) {