Skip to content

Commit

Permalink
Merge #112123
Browse files Browse the repository at this point in the history
112123: roachtest: initialize system before restricting lease transfers r=andrewbaptist a=andrewbaptist

The test would previously set the parameter
`COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER` prior to initializing the
kv workload wth 1K splits. This would occasionally fail due to
rebalancing of ranges post-split. This commit allows all the splits and
rebalances to happen prior to setting this flat.

Epic: none
Fixes: #111068
Fixes: #110842

Release note: None

Co-authored-by: Andrew Baptist <[email protected]>
  • Loading branch information
craig[bot] and andrewbaptist committed Oct 11, 2023
2 parents 0417071 + d22da89 commit 69bd71a
Showing 1 changed file with 10 additions and 9 deletions.
19 changes: 10 additions & 9 deletions pkg/cmd/roachtest/tests/failover.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,6 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C
opts := option.DefaultStartOpts()
opts.RoachprodOpts.ScheduleBackups = false
settings := install.MakeClusterSettings()
settings.Env = append(settings.Env, "COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER=true")
settings.Env = append(settings.Env, "COCKROACH_SCAN_MAX_IDLE_TIME=100ms") // speed up replication
// TODO(erikgrinaker): temporary workaround for
// https://github.com/cockroachdb/cockroach/issues/105274
settings.ClusterSettings["server.span_stats.span_batch_limit"] = "4096"
Expand All @@ -558,18 +556,12 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C
// initial RF of 5, so pass in exactlyReplicationFactor below.
require.NoError(t, WaitForReplication(ctx, t, conn, 3, exactlyReplicationFactor))

// Disable the replicate queue. It can otherwise end up with stuck
// overreplicated ranges during rebalancing, because downreplication requires
// the Raft leader to be colocated with the leaseholder.
_, err := conn.ExecContext(ctx, `SET CLUSTER SETTING kv.replicate_queue.enabled = false`)
require.NoError(t, err)

// Now that system ranges are properly placed on n1-n3, start n4-n6.
c.Start(ctx, t.L(), opts, settings, c.Range(4, 6))

// Create the kv database on n4-n6.
t.L().Printf("creating workload database")
_, err = conn.ExecContext(ctx, `CREATE DATABASE kv`)
_, err := conn.ExecContext(ctx, `CREATE DATABASE kv`)
require.NoError(t, err)
configureZone(t, ctx, conn, `DATABASE kv`, zoneConfig{replicas: 3, onlyNodes: []int{4, 5, 6}})

Expand All @@ -581,6 +573,15 @@ func runFailoverPartialLeaseLeader(ctx context.Context, t test.Test, c cluster.C
relocateRanges(t, ctx, conn, `database_name = 'kv'`, []int{1, 2, 3}, []int{4, 5, 6})
relocateRanges(t, ctx, conn, `database_name != 'kv'`, []int{4, 5, 6}, []int{1, 2, 3})

settings.Env = append(settings.Env, "COCKROACH_DISABLE_LEADER_FOLLOWS_LEASEHOLDER=true")

// Restart the nodes with the updated cluster setting.
c.Stop(ctx, t.L(), option.DefaultStopOpts(), c.Range(1, 6))
c.Start(ctx, t.L(), opts, settings, c.Range(1, 6))

// Wait for the TimeAfterNodeSuspect interval.
sleepFor(ctx, t, 30*time.Second)

// Check that we have a few split leaders/leaseholders on n4-n6. We give
// it a few seconds, since metrics are updated every 10 seconds.
for i := 0; ; i++ {
Expand Down

0 comments on commit 69bd71a

Please sign in to comment.