From 025c6609f7f7906e28853dd32cbd5ce05ed6df32 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Mon, 27 Mar 2023 12:24:40 +0000 Subject: [PATCH 1/2] Revert #98150 This reverts #98150 because we think it introduced a problem that was detected via the `replicate/wide` roachtest[^1]. It seems that for reasons still unknown we do rely on the periodic gossip trigger on liveness lease extensions. [^1]: https://github.com/cockroachdb/cockroach/issues/99268#issuecomment-1484968745 Touches #99268. Touches #97966. Closes #99268. Touches #98945. Epic: none Release note: None --- pkg/kv/kvserver/replica_proposal.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index 5f8548109c45..45565c2300e3 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -423,8 +423,7 @@ func (r *Replica) leasePostApplyLocked( // this gossip if we're taking over after a leaseholder failure. In both cases, // incremental liveness updates may have been lost, so we want to make sure that // the latest view of node liveness ends up in gossip. - nls := keys.NodeLivenessSpan - if leaseChangingHands && iAmTheLeaseHolder && kvserverbase.ContainsKeyRange(r.descRLocked(), nls.Key, nls.EndKey) { + if leaseChangingHands && iAmTheLeaseHolder { // NB: run these in an async task to keep them out of the critical section // (r.mu is held here). _ = r.store.stopper.RunAsyncTask(r.AnnotateCtx(context.Background()), "lease-triggers", func(ctx context.Context) { From 887be4dcbdd7268d5773b148d7385fa5f799b16b Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Mon, 27 Mar 2023 12:27:09 +0000 Subject: [PATCH 2/2] Revert #98150 (2/2) Epic: none Release note: None --- pkg/kv/kvserver/replica_proposal.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index 45565c2300e3..162d76b7ae32 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -418,12 +418,14 @@ func (r *Replica) leasePostApplyLocked( } } - // Potentially re-gossip if the range contains node liveness data. We need to - // perform this gossip at startup as soon as possible. We also need to perform - // this gossip if we're taking over after a leaseholder failure. In both cases, - // incremental liveness updates may have been lost, so we want to make sure that - // the latest view of node liveness ends up in gossip. - if leaseChangingHands && iAmTheLeaseHolder { + // Potentially re-gossip if the range contains system data (e.g. system + // config or node liveness). We need to perform this gossip at startup as + // soon as possible. Trying to minimize how often we gossip is a fool's + // errand. The node liveness info will be gossiped frequently (every few + // seconds) in any case due to the liveness heartbeats. And the system config + // will be gossiped rarely because it falls on a range with an epoch-based + // range lease that is only reacquired extremely infrequently. + if iAmTheLeaseHolder { // NB: run these in an async task to keep them out of the critical section // (r.mu is held here). _ = r.store.stopper.RunAsyncTask(r.AnnotateCtx(context.Background()), "lease-triggers", func(ctx context.Context) {