From 87b463a09f714af53270f4307d38a01fc76cebf6 Mon Sep 17 00:00:00 2001 From: Andrew Baptist Date: Mon, 9 Dec 2024 10:14:02 -0500 Subject: [PATCH] roachtest: increase the token return time with disk bandwidth limit Previously the perturbation/* tests would wait 10m for tokens to be returned. Without the disk bandwidth limit set, they typically return almost immediately but with a limit they can take ~30m to return in some cases even after the workload is stopped and the system is idle. This change fixes some of the perturbation/metamorphic/* tests that are hitting this slow token return. Additionally this change reduces the token wait time for the test admission-control/elastic-workload/mixed-version to 1 minute as this test doesn't typically wait more then a few seconds for token return. Epic: none Fixes: #136982 Fixes: #136553 Informs: #137017 Release note: None --- pkg/cmd/roachtest/roachtestutil/validation_check.go | 10 +++++++--- .../tests/admission_control_elastic_mixed_version.go | 2 +- pkg/cmd/roachtest/tests/perturbation/framework.go | 9 ++++++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pkg/cmd/roachtest/roachtestutil/validation_check.go b/pkg/cmd/roachtest/roachtestutil/validation_check.go index d2433f6c0eff..cb24489fb21a 100644 --- a/pkg/cmd/roachtest/roachtestutil/validation_check.go +++ b/pkg/cmd/roachtest/roachtestutil/validation_check.go @@ -128,7 +128,11 @@ func CheckInvalidDescriptors(ctx context.Context, db *gosql.DB) error { // validateTokensReturned ensures that all RACv2 tokens are returned to the pool // at the end of the test. func ValidateTokensReturned( - ctx context.Context, t test.Test, c cluster.Cluster, nodes option.NodeListOption, + ctx context.Context, + t test.Test, + c cluster.Cluster, + nodes option.NodeListOption, + waitTime time.Duration, ) { t.L().Printf("validating all tokens returned") for _, node := range nodes { @@ -163,10 +167,10 @@ func ValidateTokensReturned( } } return nil - // We wait up to 10 minutes for the tokens to be returned. In tests which + // We wait up to waitTime for the tokens to be returned. In tests which // purposefully create a send queue towards a node, the queue may take a // while to drain. The tokens will not be returned until the queue is // empty and there are no inflight requests. - }, 10*time.Minute) + }, waitTime) } } diff --git a/pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go b/pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go index 1dec1fa84742..26e0089a2f9b 100644 --- a/pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go +++ b/pkg/cmd/roachtest/tests/admission_control_elastic_mixed_version.go @@ -133,7 +133,7 @@ func registerElasticWorkloadMixedVersion(r registry.Registry) { mvt.Run() // TODO(pav-kv): also validate that the write throughput was kept under // control, and the foreground traffic was not starved. - roachtestutil.ValidateTokensReturned(ctx, t, c, c.CRDBNodes()) + roachtestutil.ValidateTokensReturned(ctx, t, c, c.CRDBNodes(), time.Minute) }, }) } diff --git a/pkg/cmd/roachtest/tests/perturbation/framework.go b/pkg/cmd/roachtest/tests/perturbation/framework.go index e1c5023eab20..410c7e8b67f7 100644 --- a/pkg/cmd/roachtest/tests/perturbation/framework.go +++ b/pkg/cmd/roachtest/tests/perturbation/framework.go @@ -669,7 +669,14 @@ func (v variations) runTest(ctx context.Context, t test.Test, c cluster.Cluster) t.L().Printf("validating stats after the perturbation") failures = append(failures, isAcceptableChange(t.L(), baselineStats, afterStats, v.acceptableChange)...) require.True(t, len(failures) == 0, strings.Join(failures, "\n")) - roachtestutil.ValidateTokensReturned(ctx, t, v, v.stableNodes()) + // TODO(baptist): Look at the time for token return in actual tests to + // determine if this can be lowered further. + tokenReturnTime := 10 * time.Minute + // TODO(#137017): Increase the return time if disk bandwidth limit is set. + if v.diskBandwidthLimit != "0" { + tokenReturnTime = 1 * time.Hour + } + roachtestutil.ValidateTokensReturned(ctx, t, v, v.stableNodes(), tokenReturnTime) } func (v variations) applyClusterSettings(ctx context.Context, t test.Test) {