Skip to content

Commit

Permalink
kvserver: deflake TestLeasePreferencesDuringOutage
Browse files Browse the repository at this point in the history
Test didn't accomodate for lease type changes on the same
leaseholder in its assertions.

Release note: None
  • Loading branch information
aliher1911 committed May 30, 2023
1 parent a65bca2 commit 9da622d
Showing 1 changed file with 29 additions and 8 deletions.
37 changes: 29 additions & 8 deletions pkg/kv/kvserver/client_lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"math"
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
"testing"
Expand Down Expand Up @@ -991,8 +992,8 @@ func TestLeasePreferencesDuringOutage(t *testing.T) {
tc.StopServer(1)
tc.StopServer(2)

wait := func(duration int64) {
manualClock.Increment(duration)
wait := func(duration time.Duration) {
manualClock.Increment(duration.Nanoseconds())
// Gossip and heartbeat all the live stores, we do this manually otherwise the
// allocator on server 0 may see everyone as temporarily dead due to the
// clock move above.
Expand All @@ -1003,19 +1004,19 @@ func TestLeasePreferencesDuringOutage(t *testing.T) {
}
// We need to wait until 2 and 3 are considered to be dead.
timeUntilStoreDead := storepool.TimeUntilStoreDead.Get(&tc.GetFirstStoreFromServer(t, 0).GetStoreConfig().Settings.SV)
wait(timeUntilStoreDead.Nanoseconds())
wait(timeUntilStoreDead)

checkDead := func(store *kvserver.Store, storeIdx int) error {
if dead, timetoDie, err := store.GetStoreConfig().StorePool.IsDead(
tc.GetFirstStoreFromServer(t, storeIdx).StoreID()); err != nil || !dead {
// Sometimes a gossip update arrives right after server shutdown and
// after we manually moved the time, so move it again.
if err == nil {
wait(timetoDie.Nanoseconds())
wait(timetoDie)
}
// NB: errors.Wrapf(nil, ...) returns nil.
// nolint:errwrap
return errors.Errorf("expected server 2 to be dead, instead err=%v, dead=%v", err, dead)
return errors.Errorf("expected server %d to be dead, instead err=%v, dead=%v", storeIdx, err, dead)
}
return nil
}
Expand All @@ -1038,7 +1039,7 @@ func TestLeasePreferencesDuringOutage(t *testing.T) {
_, _, enqueueError := tc.GetFirstStoreFromServer(t, 0).
Enqueue(ctx, "replicate", repl, true /* skipShouldQueue */, false /* async */)

require.NoError(t, enqueueError)
require.NoError(t, enqueueError, "failed to enqueue replica for replication")

var newLeaseHolder roachpb.ReplicationTarget
testutils.SucceedsSoon(t, func() error {
Expand All @@ -1062,8 +1063,28 @@ func TestLeasePreferencesDuringOutage(t *testing.T) {
require.NotEqual(t, "sf", dc)
}
history := repl.GetLeaseHistory()
// make sure we see the eu node as a lease holder in the second to last position.
require.Equal(t, tc.Target(0).NodeID, history[len(history)-2].Replica.NodeID)
// Make sure we see the eu node as a lease holder in the second to last
// leaseholder change.
// Since we can have expiration and epoch based leases at the tail of the
// history, we need to ignore them together if they originate from the same
// leaseholder.
nextNodeID := history[len(history)-1].Replica.NodeID
lastMove := len(history) - 2
for ; lastMove >= 0; lastMove-- {
if history[lastMove].Replica.NodeID != nextNodeID {
break
}
}
lastMove++
var leasesMsg []string
for _, h := range history {
leasesMsg = append(leasesMsg, h.String())
}
leaseHistory := strings.Join(leasesMsg, ", ")
require.Greater(t, lastMove, 0,
"must have at least one leaseholder change in history (lease history: %s)", leaseHistory)
require.Equal(t, tc.Target(0).NodeID, history[lastMove-1].Replica.NodeID,
"node id prior to last lease move (lease history: %s)", leaseHistory)
}

// This test verifies that when a node starts flapping its liveness, all leases
Expand Down

0 comments on commit 9da622d

Please sign in to comment.