Skip to content

Commit

Permalink
kvserver: delete TestRefreshPendingCommands
Browse files Browse the repository at this point in the history
This test either has completely rotted, or has always been confused. It
tries to verify that different mechanisms trigger raft reproposals, but
it doesn't seem to actually depend on reproposals. The test says that an
increment depend on the reproposal of a previous lease request, but
there's no such lease request. Also, technically reproposals of lease
requests stopped being a thing a while ago. It also talks about Raft
leadership changing, but that's not explicit in the test.

The test fails with the next commit that changes how draining replicas
handle lease requests. It's unclear to me what's salvageable from the
test.

Release note: None
  • Loading branch information
andreimatei committed Oct 26, 2020
1 parent be25a97 commit 480c9f0
Showing 1 changed file with 0 additions and 152 deletions.
152 changes: 0 additions & 152 deletions pkg/kv/kvserver/client_raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1234,158 +1234,6 @@ func TestReplicateAfterRemoveAndSplit(t *testing.T) {
mtc.stores[2].SetReplicaGCQueueActive(true)
}

// Test various mechanism for refreshing pending commands.
func TestRefreshPendingCommands(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

// In this scenario, three different mechanisms detect the need to repropose
// commands. Test that each one is sufficient individually. We have this
// redundancy because some mechanisms respond with lower latency than others,
// but each has some scenarios (not currently tested) in which it is
// insufficient on its own. In addition, there is a fourth reproposal
// mechanism (reasonNewLeaderOrConfigChange) which is not relevant to this
// scenario.
//
// We don't test with only reasonNewLeader because that mechanism is less
// robust than refreshing due to snapshot or ticks. In particular, it is
// possible for node 3 to propose the RequestLease command and have that
// command executed by the other nodes but to never see the execution locally
// because it is caught up by applying a snapshot.
testCases := map[string]kvserver.StoreTestingKnobs{
"reasonSnapshotApplied": {
DisableRefreshReasonNewLeader: true,
DisableRefreshReasonTicks: true,
},
"reasonTicks": {
DisableRefreshReasonNewLeader: true,
DisableRefreshReasonSnapshotApplied: true,
},
}
for name, c := range testCases {
t.Run(name, func(t *testing.T) {
sc := kvserver.TestStoreConfig(nil)
sc.TestingKnobs = c
// Disable periodic gossip tasks which can move the range 1 lease
// unexpectedly.
sc.TestingKnobs.DisablePeriodicGossips = true
sc.Clock = nil // manual clock
mtc := &multiTestContext{
storeConfig: &sc,
// This test was written before the multiTestContext started creating
// many system ranges at startup, and hasn't been update to take that
// into account.
startWithSingleRange: true,
}
defer mtc.Stop()
mtc.Start(t, 3)

const rangeID = roachpb.RangeID(1)
mtc.replicateRange(rangeID, 1, 2)

// Put some data in the range so we'll have something to test for.
incArgs := incrementArgs([]byte("a"), 5)
if _, err := kv.SendWrapped(context.Background(), mtc.stores[0].TestSender(), incArgs); err != nil {
t.Fatal(err)
}

// Wait for all nodes to catch up.
mtc.waitForValues(roachpb.Key("a"), []int64{5, 5, 5})

// Stop node 2; while it is down write some more data.
mtc.stopStore(2)

if _, err := kv.SendWrapped(context.Background(), mtc.stores[0].TestSender(), incArgs); err != nil {
t.Fatal(err)
}

// Get the last increment's log index.
repl, err := mtc.stores[0].GetReplica(1)
if err != nil {
t.Fatal(err)
}
index, err := repl.GetLastIndex()
if err != nil {
t.Fatal(err)
}

// Truncate the log at index+1 (log entries < N are removed, so this includes
// the increment).
truncArgs := truncateLogArgs(index+1, rangeID)
if _, err := kv.SendWrapped(context.Background(), mtc.stores[0].TestSender(), truncArgs); err != nil {
t.Fatal(err)
}

// Stop and restart node 0 in order to make sure that any in-flight Raft
// messages have been sent.
mtc.stopStore(0)
mtc.restartStore(0)

////////////////////////////////////////////////////////////////////
// We want store 2 to take the lease later, so we'll drain the other
// stores and expire the lease.
////////////////////////////////////////////////////////////////////

// Disable node liveness heartbeats which can reacquire leases when we're
// trying to expire them. We pause liveness heartbeats here after node 0
// was restarted (which creates a new NodeLiveness).
pauseNodeLivenessHeartbeatLoops(mtc)

// Start draining stores 0 and 1 to prevent them from grabbing any new
// leases.
mtc.advanceClock(context.Background())
var wg sync.WaitGroup
for i := 0; i < 2; i++ {
wg.Add(1)
go func(i int) {
mtc.stores[i].SetDraining(true, nil /* reporter */)
wg.Done()
}(i)
}

// Wait for the stores 0 and 1 to have entered draining mode, and then
// advance the clock. Advancing the clock will leave the liveness records
// of draining nodes in an expired state, so the SetDraining() call above
// will be able to terminate.
draining := false
for !draining {
draining = true
for i := 0; i < 2; i++ {
draining = draining && mtc.stores[i].IsDraining()
}
// Allow this loop to be preempted. Failure to do so can cause a
// deadlock because a non-preemptible loop will prevent GC from
// starting which in turn will cause all other goroutines to be stuck
// as soon as they are called on to assist the GC (this shows up as
// goroutines stuck in "GC assist wait"). With all of the other
// goroutines stuck, nothing will be able to set mtc.stores[i].draining
// to true.
//
// See #18554.
runtime.Gosched()
}
mtc.advanceClock(context.Background())

wg.Wait()

// Restart node 2 and wait for the snapshot to be applied. Note that
// waitForValues reads directly from the engine and thus isn't executing
// a Raft command.
mtc.restartStore(2)
mtc.waitForValues(roachpb.Key("a"), []int64{10, 10, 10})

// Send an increment to the restarted node. If we don't refresh pending
// commands appropriately, the range lease command will not get
// re-proposed when we discover the new leader.
if _, err := kv.SendWrapped(context.Background(), mtc.stores[2].TestSender(), incArgs); err != nil {
t.Fatal(err)
}

mtc.waitForValues(roachpb.Key("a"), []int64{15, 15, 15})
})
}
}

// Test that when a Raft group is not able to establish a quorum, its Raft log
// does not grow without bound. It tests two different scenarios where this used
// to be possible (see #27772):
Expand Down

0 comments on commit 480c9f0

Please sign in to comment.