From a45c9c0b6515198102dbea9b2e5a9278b86ffc3b Mon Sep 17 00:00:00 2001 From: Ibrahim Kettaneh Date: Mon, 19 Aug 2024 10:56:04 -0400 Subject: [PATCH] kvserver: deflake TestRangefeedCheckpointsRecoverFromLeaseExpiration This commit does the following to deflake the test: wait for N1's view of N2's lease expiration to match N2's view. This is important in the rare case where N1 tries to increase N2's epoch, but it has a stale view of the lease expiration time. Fixes: #124178, #123551 Epic: None Release note: None --- pkg/kv/kvserver/replica_rangefeed_test.go | 24 ++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/pkg/kv/kvserver/replica_rangefeed_test.go b/pkg/kv/kvserver/replica_rangefeed_test.go index bdd22633f1a1..0dcab22fbe70 100644 --- a/pkg/kv/kvserver/replica_rangefeed_test.go +++ b/pkg/kv/kvserver/replica_rangefeed_test.go @@ -1367,8 +1367,6 @@ func TestRangefeedCheckpointsRecoverFromLeaseExpiration(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - skip.WithIssue(t, 123551) - ctx := context.Background() var scratchRangeID int64 // accessed atomically // nudgeSeen will be set if a request filter sees the signature of the @@ -1509,13 +1507,25 @@ func TestRangefeedCheckpointsRecoverFromLeaseExpiration(t *testing.T) { // Expire the lease. Given that the Raft leadership is on n2, only n2 will be // eligible to acquire a new lease. log.Infof(ctx, "test expiring lease") - nl := n2.NodeLiveness().(*liveness.NodeLiveness) - resumeHeartbeats := nl.PauseAllHeartbeatsForTest() - n2Liveness, ok := nl.Self() + nl2 := n2.NodeLiveness().(*liveness.NodeLiveness) + resumeHeartbeats := nl2.PauseAllHeartbeatsForTest() + n2Liveness, ok := nl2.Self() require.True(t, ok) - manualClock.Increment(n2Liveness.Expiration.ToTimestamp().Add(1, 0).WallTime - manualClock.UnixNano()) + manualClock.Increment(n2Liveness.Expiration.ToTimestamp(). + Add(1, 0).WallTime - manualClock.UnixNano()) atomic.StoreInt64(&rejectExtraneousRequests, 1) - // Ask another node to increment n2's liveness record. + + // Ask another node to increment n2's liveness record, but first, wait until + // n1's liveness state is the same as n2's. Otherwise, the epoch below might + // get rejected because of mismatching liveness records. + testutils.SucceedsSoon(t, func() error { + nl1 := n1.NodeLiveness().(*liveness.NodeLiveness) + n2LivenessFromN1, _ := nl1.GetLiveness(n2.NodeID()) + if n2Liveness != n2LivenessFromN1.Liveness { + return errors.Errorf("waiting for node 2 liveness to converge on both nodes 1 and 2") + } + return nil + }) require.NoError(t, n1.NodeLiveness().(*liveness.NodeLiveness).IncrementEpoch(ctx, n2Liveness)) resumeHeartbeats()