Skip to content

Commit

Permalink
kvserver: disable allocator checks with `COCKROACH_DISABLE_LEADER_FOL…
Browse files Browse the repository at this point in the history
…LOWS_LEASEHOLDER`

Otherwise, the replicate queue may be unable to relocate leaseholder
replicas. This can cause e.g. `failover/partial/lease-leader` to flake.

Epic: none
Release note: None
  • Loading branch information
erikgrinaker committed Oct 24, 2023
1 parent 736174e commit 56fb5b1
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 9 deletions.
9 changes: 2 additions & 7 deletions pkg/cmd/roachtest/tests/failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,22 +545,17 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C

// Place all ranges on n1-n3 to start with, and wait for upreplication.
configureAllZones(t, ctx, conn, zoneConfig{replicas: 3, onlyNodes: []int{1, 2, 3}})

// NB: We want to ensure the system ranges are all down-replicated from their
// initial RF of 5, so pass in exactlyReplicationFactor below.
require.NoError(t, WaitForReplication(ctx, t, conn, 3, exactlyReplicationFactor))

// Disable the replicate queue. It can otherwise end up with stuck
// overreplicated ranges during rebalancing, because downreplication requires
// the Raft leader to be colocated with the leaseholder.
_, err := conn.ExecContext(ctx, `SET CLUSTER SETTING kv.replicate_queue.enabled = false`)
require.NoError(t, err)

// Now that system ranges are properly placed on n1-n3, start n4-n6.
c.Start(ctx, t.L(), opts, settings, c.Range(4, 6))

// Create the kv database on n4-n6.
t.L().Printf("creating workload database")
_, err = conn.ExecContext(ctx, `CREATE DATABASE kv`)
_, err := conn.ExecContext(ctx, `CREATE DATABASE kv`)
require.NoError(t, err)
configureZone(t, ctx, conn, `DATABASE kv`, zoneConfig{replicas: 3, onlyNodes: []int{4, 5, 6}})

Expand Down
6 changes: 4 additions & 2 deletions pkg/kv/kvserver/allocator/allocatorimpl/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -2183,8 +2183,10 @@ func (a *Allocator) leaseholderShouldMoveDueToPreferences(
// If there are any replicas that do match lease preferences, then we check if
// the existing leaseholder is one of them.
preferred := a.PreferredLeaseholders(storePool, conf, candidates)
preferred = excludeReplicasInNeedOfSnapshots(
ctx, leaseRepl.RaftStatus(), leaseRepl.GetFirstIndex(), preferred)
if a.knobs == nil || !a.knobs.AllowLeaseTransfersToReplicasNeedingSnapshots {
preferred = excludeReplicasInNeedOfSnapshots(
ctx, leaseRepl.RaftStatus(), leaseRepl.GetFirstIndex(), preferred)
}
if len(preferred) == 0 {
return false
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,12 @@ func (sc *StoreConfig) SetDefaults(numStores int) {
if raftDisableLeaderFollowsLeaseholder {
sc.TestingKnobs.DisableLeaderFollowsLeaseholder = true
sc.TestingKnobs.AllowLeaseRequestProposalsWhenNotLeader = true // otherwise lease requests fail
// The allocator must skip snapshot checks, since these only work when the
// leader and leaseholder are colocated.
if sc.TestingKnobs.AllocatorKnobs == nil {
sc.TestingKnobs.AllocatorKnobs = &allocator.TestingKnobs{}
}
sc.TestingKnobs.AllocatorKnobs.AllowLeaseTransfersToReplicasNeedingSnapshots = true
}
if raftDisableQuiescence {
sc.TestingKnobs.DisableQuiescence = true
Expand Down

0 comments on commit 56fb5b1

Please sign in to comment.