Skip to content

Commit

Permalink
Merge #103190
Browse files Browse the repository at this point in the history
103190: roachtest: enable metamorphic expiration leases in most tests r=erikgrinaker a=erikgrinaker

**roachprod: add option for initial cluster settings**

This patch adds a roachprod option `ClusterSettingsOption` that can be used to pass arbitrary initial cluster settings during cluster start.

It also no longer sets `server.remote_debugging.mode = 'any'`, since this setting was removed in 21.2.

  
**roachtest: add option for metamorphic expiration leases**

To increase test coverage of expiration-based leases, this patch adds an opt-in test parameter `Leases` that allows tests to enable them either metamorphically or explicitly.

**roachtest: add TPCC benchmarks with expiration leases**

This patch adds TPCC benchmark variants with expiration leases for a few representative configurations.

**roachtest: enable metamorphic expiration leases in most tests**

This patch uses metamorphic expiration leases in most non-benchmark roachtests. There will likely be some fallout from this, in particular for timing/latency-sensitive tests, but we can deal with that as it happens.

Resolves #103188.
Epic: none
Release note: None

Co-authored-by: Erik Grinaker <[email protected]>
  • Loading branch information
craig[bot] and erikgrinaker committed May 14, 2023
2 parents 63eb6b8 + fc68b0f commit a65bca2
Show file tree
Hide file tree
Showing 93 changed files with 303 additions and 23 deletions.
5 changes: 5 additions & 0 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,9 @@ type clusterImpl struct {
expiration time.Time
encAtRest bool // use encryption at rest

// clusterSettings are additional cluster settings set on cluster startup.
clusterSettings map[string]string

// destroyState contains state related to the cluster's destruction.
destroyState destroyState
}
Expand Down Expand Up @@ -1887,6 +1890,8 @@ func (c *clusterImpl) StartE(
install.EnvOption(settings.Env),
install.NumRacksOption(settings.NumRacks),
install.BinaryOption(settings.Binary),
install.ClusterSettingsOption(c.clusterSettings),
install.ClusterSettingsOption(settings.ClusterSettings),
}

if err := roachprod.Start(ctx, l, c.MakeNodes(opts...), startOpts.RoachprodOpts, clusterSettingsOpts...); err != nil {
Expand Down
33 changes: 33 additions & 0 deletions pkg/cmd/roachtest/registry/test_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package registry

import (
"context"
"fmt"
"regexp"
"strings"
"time"
Expand Down Expand Up @@ -85,6 +86,10 @@ type TestSpec struct {
// cannot be run with encryption enabled.
EncryptionSupport EncryptionSupport

// Leases specifies the kind of leases to use for the cluster. Defaults
// to epoch leases.
Leases LeaseType

// SkipPostValidations is a bit-set of post-validations that should be skipped
// after the test completes. This is useful for tests that are known to be
// incompatible with some validations. By default, tests will run all
Expand Down Expand Up @@ -181,3 +186,31 @@ func Tags(values ...string) map[string]struct{} {
}
return set
}

// LeaseType specifies the type of leases to use for the cluster.
type LeaseType int

func (l LeaseType) String() string {
switch l {
case EpochLeases:
return "epoch"
case ExpirationLeases:
return "expiration"
case MetamorphicLeases:
return "metamorphic"
default:
return fmt.Sprintf("leasetype-%d", l)
}
}

const (
// DefaultLeases uses the default cluster lease type.
DefaultLeases = LeaseType(iota)
// EpochLeases uses epoch leases where possible.
EpochLeases
// ExpirationLeases uses expiration leases for all ranges.
ExpirationLeases
// MetamorphicLeases randomly chooses epoch or expiration
// leases (across the entire cluster)
MetamorphicLeases
)
21 changes: 20 additions & 1 deletion pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,8 @@ func (r *testRunner) runWorker(
// test".
c.status("running test")

switch t.Spec().(*registry.TestSpec).EncryptionSupport {
testSpec := t.Spec().(*registry.TestSpec)
switch testSpec.EncryptionSupport {
case registry.EncryptionAlwaysEnabled:
c.encAtRest = true
case registry.EncryptionAlwaysDisabled:
Expand All @@ -707,6 +708,24 @@ func (r *testRunner) runWorker(
c.encAtRest = prng.Float64() < encryptionProbability
}

// Set initial cluster settings for this test.
c.clusterSettings = map[string]string{}

switch testSpec.Leases {
case registry.DefaultLeases:
case registry.EpochLeases:
c.clusterSettings["kv.expiration_leases_only.enabled"] = "false"
case registry.ExpirationLeases:
c.clusterSettings["kv.expiration_leases_only.enabled"] = "true"
case registry.MetamorphicLeases:
enabled := prng.Float64() < 0.5
c.status(fmt.Sprintf("metamorphically setting kv.expiration_leases_only.enabled = %t",
enabled))
c.clusterSettings["kv.expiration_leases_only.enabled"] = fmt.Sprintf("%t", enabled)
default:
t.Fatalf("unknown lease type %s", testSpec.Leases)
}

wStatus.SetCluster(c)
wStatus.SetTest(t, testToRun)
wStatus.SetStatus("running test")
Expand Down
11 changes: 8 additions & 3 deletions pkg/cmd/roachtest/tests/acceptance.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ func registerAcceptance(r registry.Registry) {
numNodes int
timeout time.Duration
encryptionSupport registry.EncryptionSupport
defaultLeases bool
}{
registry.OwnerKV: {
{name: "decommission-self", fn: runDecommissionSelf},
Expand Down Expand Up @@ -63,9 +64,10 @@ func registerAcceptance(r registry.Registry) {
},
registry.OwnerTestEng: {
{
name: "version-upgrade",
fn: runVersionUpgrade,
timeout: 30 * time.Minute,
name: "version-upgrade",
fn: runVersionUpgrade,
timeout: 30 * time.Minute,
defaultLeases: true,
},
},
registry.OwnerDisasterRecovery: {
Expand Down Expand Up @@ -105,6 +107,9 @@ func registerAcceptance(r registry.Registry) {
spec.Timeout = tc.timeout
}
spec.EncryptionSupport = tc.encryptionSupport
if !tc.defaultLeases {
spec.Leases = registry.MetamorphicLeases
}
spec.Run = func(ctx context.Context, t test.Test, c cluster.Cluster) {
tc.fn(ctx, t, c)
}
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/activerecord.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,5 +249,6 @@ func registerActiveRecord(r registry.Registry) {
NativeLibs: registry.LibGEOS,
Tags: registry.Tags(`default`, `orm`),
Run: runActiveRecord,
Leases: registry.MetamorphicLeases,
})
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func registerElasticControlForBackups(r registry.Registry) {
Owner: registry.OwnerAdmissionControl,
Tags: registry.Tags(`weekly`),
Cluster: r.MakeClusterSpec(4, spec.CPU(8)),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
if c.Spec().NodeCount < 4 {
t.Fatalf("expected at least 4 nodes, found %d", c.Spec().NodeCount)
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/admission_control_elastic_cdc.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func registerElasticControlForCDC(r registry.Registry) {
Tags: registry.Tags(`weekly`),
Cluster: r.MakeClusterSpec(4, spec.CPU(8)),
RequiresLicense: true,
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
if c.Spec().NodeCount < 4 {
t.Fatalf("expected at least 4 nodes, found %d", c.Spec().NodeCount)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func registerIndexOverload(r registry.Registry) {
Owner: registry.OwnerAdmissionControl,
Tags: registry.Tags("weekly"),
Cluster: r.MakeClusterSpec(4, spec.CPU(8)),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
crdbNodes := c.Spec().NodeCount - 1
workloadNode := c.Spec().NodeCount
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ func registerMultiStoreOverload(r registry.Registry) {
Owner: registry.OwnerAdmissionControl,
Tags: registry.Tags(`weekly`),
Cluster: r.MakeClusterSpec(2, spec.CPU(8), spec.SSD(2)),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runKV(ctx, t, c)
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ func registerMultiTenantFairness(r registry.Registry) {
Name: fmt.Sprintf("admission-control/multitenant-fairness/%s", s.name),
Cluster: r.MakeClusterSpec(5),
Owner: registry.OwnerAdmissionControl,
Leases: registry.MetamorphicLeases,
NonReleaseBlocker: false,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runMultiTenantFairness(ctx, t, c, s)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func registerSnapshotOverload(r registry.Registry) {
Owner: registry.OwnerAdmissionControl,
Tags: registry.Tags(`weekly`),
Cluster: r.MakeClusterSpec(4, spec.CPU(8)),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
if c.Spec().NodeCount < 4 {
t.Fatalf("expected at least 4 nodes, found %d", c.Spec().NodeCount)
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/admission_control_tpcc_overload.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ func registerTPCCOverload(r registry.Registry) {
Cluster: r.MakeClusterSpec(s.Nodes+1, spec.CPU(s.CPUs)),
Run: s.run,
EncryptionSupport: registry.EncryptionMetamorphic,
Leases: registry.MetamorphicLeases,
Timeout: 20 * time.Minute,
})
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/cmd/roachtest/tests/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ func registerAllocator(r registry.Registry) {
Name: `replicate/up/1to3`,
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(4),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAllocator(ctx, t, c, 1, 10.0)
},
Expand All @@ -159,6 +160,7 @@ func registerAllocator(r registry.Registry) {
Name: `replicate/rebalance/3to5`,
Owner: registry.OwnerKV,
Cluster: r.MakeClusterSpec(6),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAllocator(ctx, t, c, 3, 42.0)
},
Expand All @@ -168,6 +170,7 @@ func registerAllocator(r registry.Registry) {
Owner: registry.OwnerKV,
Timeout: 10 * time.Minute,
Cluster: r.MakeClusterSpec(9, spec.CPU(1)),
Leases: registry.MetamorphicLeases,
Run: runWideReplication,
})
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/cmd/roachtest/tests/alterpk.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ func registerAlterPK(r registry.Registry) {
// Use a 4 node cluster -- 3 nodes will run cockroach, and the last will be the
// workload driver node.
Cluster: r.MakeClusterSpec(4),
Leases: registry.MetamorphicLeases,
Run: runAlterPKBank,
})
r.Add(registry.TestSpec{
Expand All @@ -191,6 +192,7 @@ func registerAlterPK(r registry.Registry) {
// Use a 4 node cluster -- 3 nodes will run cockroach, and the last will be the
// workload driver node.
Cluster: r.MakeClusterSpec(4, spec.CPU(32)),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAlterPKTPCC(ctx, t, c, 250 /* warehouses */, true /* expensiveChecks */)
},
Expand All @@ -201,6 +203,7 @@ func registerAlterPK(r registry.Registry) {
// Use a 4 node cluster -- 3 nodes will run cockroach, and the last will be the
// workload driver node.
Cluster: r.MakeClusterSpec(4, spec.CPU(16)),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAlterPKTPCC(ctx, t, c, 500 /* warehouses */, false /* expensiveChecks */)
},
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/asyncpg.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ func registerAsyncpg(r registry.Registry) {
Owner: registry.OwnerSQLSessions,
Cluster: r.MakeClusterSpec(1, spec.CPU(16)),
Tags: registry.Tags(`default`, `orm`),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runAsyncpg(ctx, t, c)
},
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/awsdms.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ func registerAWSDMS(r registry.Registry) {
Name: "awsdms",
Owner: registry.OwnerSQLSessions, // TODO(otan): add a migrations OWNERS team
Cluster: r.MakeClusterSpec(1),
Leases: registry.MetamorphicLeases,
Tags: registry.Tags(`default`, `awsdms`, `aws`),
Run: runAWSDMS,
})
Expand Down
6 changes: 6 additions & 0 deletions pkg/cmd/roachtest/tests/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ func registerBackupNodeShutdown(r registry.Registry) {
Owner: registry.OwnerDisasterRecovery,
Cluster: backupNodeRestartSpec,
EncryptionSupport: registry.EncryptionMetamorphic,
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
gatewayNode := 2
nodeToShutdown := 3
Expand All @@ -237,6 +238,7 @@ func registerBackupNodeShutdown(r registry.Registry) {
Owner: registry.OwnerDisasterRecovery,
Cluster: backupNodeRestartSpec,
EncryptionSupport: registry.EncryptionMetamorphic,
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
gatewayNode := 2
nodeToShutdown := 2
Expand Down Expand Up @@ -432,6 +434,7 @@ func registerBackup(r registry.Registry) {
Owner: registry.OwnerDisasterRecovery,
Cluster: r.MakeClusterSpec(3),
EncryptionSupport: registry.EncryptionMetamorphic,
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
if c.Spec().Cloud != item.machine {
t.Skip("backup assumeRole is only configured to run on "+item.machine, "")
Expand Down Expand Up @@ -538,6 +541,7 @@ func registerBackup(r registry.Registry) {
Owner: registry.OwnerDisasterRecovery,
Cluster: KMSSpec,
EncryptionSupport: registry.EncryptionMetamorphic,
Leases: registry.MetamorphicLeases,
Tags: item.tags,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
if c.Spec().Cloud != item.machine {
Expand Down Expand Up @@ -671,6 +675,7 @@ func registerBackup(r registry.Registry) {
Name: `backupTPCC`,
Owner: registry.OwnerDisasterRecovery,
Cluster: r.MakeClusterSpec(3),
Leases: registry.MetamorphicLeases,
Timeout: 1 * time.Hour,
EncryptionSupport: registry.EncryptionMetamorphic,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
Expand Down Expand Up @@ -877,6 +882,7 @@ func registerBackup(r registry.Registry) {
Owner: registry.OwnerDisasterRecovery,
Timeout: 4 * time.Hour,
Cluster: r.MakeClusterSpec(3, spec.CPU(8)),
Leases: registry.MetamorphicLeases,
EncryptionSupport: registry.EncryptionMetamorphic,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runBackupMVCCRangeTombstones(ctx, t, c, mvccRangeTombstoneConfig{})
Expand Down
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/tests/cancel.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ func registerCancel(r registry.Registry) {
Name: fmt.Sprintf("cancel/tpch/distsql/queries=%s,nodes=%d", queries, numNodes),
Owner: registry.OwnerSQLQueries,
Cluster: r.MakeClusterSpec(numNodes),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runCancel(ctx, t, c, tpchQueriesToRun, true /* useDistsql */)
},
Expand All @@ -148,6 +149,7 @@ func registerCancel(r registry.Registry) {
Name: fmt.Sprintf("cancel/tpch/local/queries=%s,nodes=%d", queries, numNodes),
Owner: registry.OwnerSQLQueries,
Cluster: r.MakeClusterSpec(numNodes),
Leases: registry.MetamorphicLeases,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runCancel(ctx, t, c, tpchQueriesToRun, false /* useDistsql */)
},
Expand Down
Loading

0 comments on commit a65bca2

Please sign in to comment.