From 69b1dcfe55e5a709b782e3209405f251b668d357 Mon Sep 17 00:00:00 2001 From: Mira Radeva Date: Tue, 10 Dec 2024 09:12:18 -0500 Subject: [PATCH 1/2] kvnemesis: increase the retry backoff for aborted txns In https://github.com/cockroachdb/cockroach/pull/135173, we tried introducing a backoff in the transaction retry mechanism (in `db.Txn`) in order to help with large transactions that repeatedly get in a deadlock (as seen in #133431). This wasn't enough as we continue to see this type of failure in #136266. This commit increases the kvnemesis-specific retry backoff, which is meant to prevent thrashing of aborted transactions. We do see transactions getting aborted as part of deadlock resolution in both of the failures mentioned above, so hopefully this will help. Both failures above are rare and hard to repro. Fixes: #136266 Release note: None --- pkg/kv/kvnemesis/applier.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/kv/kvnemesis/applier.go b/pkg/kv/kvnemesis/applier.go index 1d0a84dc5e2d..77daba35dab0 100644 --- a/pkg/kv/kvnemesis/applier.go +++ b/pkg/kv/kvnemesis/applier.go @@ -145,7 +145,7 @@ func applyOp(ctx context.Context, env *Env, db *kv.DB, op *Operation) { // epochs of the same transaction to avoid waiting while holding locks. retryOnAbort := retry.StartWithCtx(ctx, retry.Options{ InitialBackoff: 1 * time.Millisecond, - MaxBackoff: 250 * time.Millisecond, + MaxBackoff: 10 * time.Second, }) var savedTxn *kv.Txn txnErr := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { From 4ace523698de7b4b046dc70e8086deb97a6b46ee Mon Sep 17 00:00:00 2001 From: Herko Lategan Date: Fri, 13 Dec 2024 11:53:45 +0000 Subject: [PATCH 2/2] roachprod: respect AWS_PROFILE env var Previously, if the AWS_PROFILE env var was set roachprod did not forward it to the `aws cli`. This changes automatically sets the profile to the AWS_PROFILE env var if it is not specified via the `--aws-profile` roachprod flag. Epic: None Release note: None --- pkg/roachprod/vm/aws/aws.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/roachprod/vm/aws/aws.go b/pkg/roachprod/vm/aws/aws.go index c1d77002808d..1f0176e0d288 100644 --- a/pkg/roachprod/vm/aws/aws.go +++ b/pkg/roachprod/vm/aws/aws.go @@ -496,7 +496,7 @@ func (o *ProviderOpts) ConfigureCreateFlags(flags *pflag.FlagSet) { // ConfigureClusterFlags implements vm.ProviderOpts. func (o *ProviderOpts) ConfigureClusterFlags(flags *pflag.FlagSet, _ vm.MultipleProjectsOption) { - flags.StringVar(&providerInstance.Profile, ProviderName+"-profile", providerInstance.Profile, + flags.StringVar(&providerInstance.Profile, ProviderName+"-profile", os.Getenv("AWS_PROFILE"), "Profile to manage cluster in") configFlagVal := awsConfigValue{awsConfig: *DefaultConfig} providerInstance.Config = &configFlagVal.awsConfig