diff --git a/pkg/kv/kvserver/batcheval/cmd_lease_test.go b/pkg/kv/kvserver/batcheval/cmd_lease_test.go index 29f3c2639028..3c33075afc00 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease_test.go @@ -15,14 +15,12 @@ import ( "testing" "time" - "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary/rspb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/testutils" - "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -30,96 +28,6 @@ import ( "github.com/stretchr/testify/require" ) -// TestLeaseTransferWithPipelinedWrite verifies that pipelined writes -// do not cause retry errors to be leaked to clients when the error -// can be handled internally. Pipelining dissociates a write from its -// caller, so the retries of internally-generated errors (specifically -// out-of-order lease indexes) must be retried below that level. -// -// This issue was observed in practice to affect the first insert -// after table creation with high probability. -func TestLeaseTransferWithPipelinedWrite(t *testing.T) { - defer leaktest.AfterTest(t)() - defer log.Scope(t).Close(t) - - ctx := context.Background() - - tc := serverutils.StartNewTestCluster(t, 3, base.TestClusterArgs{}) - defer tc.Stopper().Stop(ctx) - - db := tc.ServerConn(0) - - for iter := 0; iter < 100; iter++ { - log.Infof(ctx, "iter %d", iter) - if _, err := db.ExecContext(ctx, "drop table if exists test"); err != nil { - t.Fatal(err) - } - if _, err := db.ExecContext(ctx, "create table test (a int, b int, primary key (a, b))"); err != nil { - t.Fatal(err) - } - - workerErrCh := make(chan error, 1) - go func() { - workerErrCh <- func() error { - for i := 0; i < 1; i++ { - tx, err := db.BeginTx(ctx, nil) - if err != nil { - return err - } - defer func() { - if tx != nil { - if err := tx.Rollback(); err != nil { - log.Warningf(ctx, "error rolling back: %+v", err) - } - } - }() - // Run two inserts in a transaction to ensure that we have - // pipelined writes that cannot be retried at the SQL layer - // due to the first-statement rule. - if _, err := tx.ExecContext(ctx, "INSERT INTO test (a, b) VALUES ($1, $2)", i, 1); err != nil { - return err - } - if _, err := tx.ExecContext(ctx, "INSERT INTO test (a, b) VALUES ($1, $2)", i, 2); err != nil { - return err - } - if err := tx.Commit(); err != nil { - return err - } - tx = nil - } - return nil - }() - }() - - // TODO(bdarnell): This test reliably reproduced the issue when - // introduced, because table creation causes splits and repeated - // table creation leads to lease transfers due to rebalancing. - // This is a subtle thing to rely on and the test might become - // more reliable if we ran more iterations in the worker goroutine - // and added a second goroutine to explicitly transfer leases back - // and forth. - - select { - case <-time.After(15 * time.Second): - // TODO(bdarnell): The test seems flaky under stress with a 5s - // timeout. Why? I'm giving it a high timeout since hanging - // isn't a failure mode we're particularly concerned about here, - // but it shouldn't be taking this long even with stress. - t.Fatal("timed out") - case err := <-workerErrCh: - if err != nil { - // We allow the transaction to run into an aborted error due to a lease - // transfer when it attempts to create its transaction record. This it - // outside of the focus of this test. - okErr := testutils.IsError(err, roachpb.ABORT_REASON_NEW_LEASE_PREVENTS_TXN.String()) - if !okErr { - t.Fatalf("worker failed: %+v", err) - } - } - } - } -} - func TestLeaseCommandLearnerReplica(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t)