Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-22.2: kvcoord: disable fatal assertion on transaction commit sanity check #114981

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions pkg/kv/kvclient/kvcoord/replayed_commit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func (f *interceptingTransport) SendNext(
// in which DistSender retries an (unbeknownst to it) successful EndTxn(commit=true)
// RPC. It documents that this triggers an assertion failure in TxnCoordSender.
//
// See: https://github.com/cockroachdb/cockroach/issues/67765
// See: https://github.com/cockroachdb/cockroach/issues/103817
func TestCommitSanityCheckAssertionFiresOnUndetectedAmbiguousCommit(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down Expand Up @@ -88,8 +88,6 @@ func TestCommitSanityCheckAssertionFiresOnUndetectedAmbiguousCommit(t *testing.T
},
}, nil
},
// Turn the assertion into an error returned via the txn.
DisableCommitSanityCheck: true,
}

tc := testcluster.StartTestCluster(t, 1, args)
Expand Down
4 changes: 0 additions & 4 deletions pkg/kv/kvclient/kvcoord/testing_knobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@ type ClientTestingKnobs struct {
// only applies to requests sent with the LEASEHOLDER routing policy.
DontReorderReplicas bool

// DisableCommitSanityCheck allows "setting" the DisableCommitSanityCheck to
// true without actually overriding the variable.
DisableCommitSanityCheck bool

// CommitWaitFilter allows tests to instrument the beginning of a transaction
// commit wait sleep.
CommitWaitFilter func()
Expand Down
37 changes: 12 additions & 25 deletions pkg/kv/kvclient/kvcoord/txn_coord_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
Expand All @@ -33,12 +32,6 @@ const (
OpTxnCoordSender = "txn coordinator send"
)

// DisableCommitSanityCheck allows opting out of a fatal assertion error that was observed in the wild
// and for which a root cause is not yet available.
//
// See: https://github.com/cockroachdb/cockroach/pull/73512.
var DisableCommitSanityCheck = envutil.EnvOrDefaultBool("COCKROACH_DISABLE_COMMIT_SANITY_CHECK", false)

// txnState represents states relating to whether an EndTxn request needs
// to be sent.
//
Expand Down Expand Up @@ -945,16 +938,14 @@ func (tc *TxnCoordSender) updateStateLocked(
// encounter such errors. Marking a transaction as explicitly-committed can also
// encounter these errors, but those errors don't make it to the TxnCoordSender.
//
// Returns the passed-in error or fatals (depending on DisableCommitSanityCheck
// env var), wrapping the input error in case of an assertion violation.
// Wraps the error in case of an assertion violation, otherwise returns as-is.
//
// The assertion is known to have failed in the wild, see:
// https://github.com/cockroachdb/cockroach/issues/67765
// This checks for the occurence of a known issue involving ambiguous write
// errors that occur alongside commits, which may race with transaction
// recovery requests started by contending operations.
// https://github.com/cockroachdb/cockroach/issues/103817
func sanityCheckErrWithTxn(
ctx context.Context,
pErrWithTxn *roachpb.Error,
ba roachpb.BatchRequest,
knobs *ClientTestingKnobs,
ctx context.Context, pErrWithTxn *roachpb.Error, ba roachpb.BatchRequest, _ *ClientTestingKnobs,
) error {
txn := pErrWithTxn.GetTxn()
if txn.Status != roachpb.COMMITTED {
Expand All @@ -967,24 +958,20 @@ func sanityCheckErrWithTxn(
return nil
}

// Finding out about our transaction being committed indicates a serious bug.
// Requests are not supposed to be sent on transactions after they are
// committed.
// Finding out about our transaction being committed indicates a serious but
// known bug. Requests are not supposed to be sent on transactions after they
// are committed.
err := errors.Wrapf(pErrWithTxn.GoError(),
"transaction unexpectedly committed, ba: %s. txn: %s",
ba, pErrWithTxn.GetTxn(),
)
err = errors.WithAssertionFailure(
errors.WithIssueLink(err, errors.IssueLink{
IssueURL: "https://github.com/cockroachdb/cockroach/issues/67765",
IssueURL: "https://github.com/cockroachdb/cockroach/issues/103817",
Detail: "you have encountered a known bug in CockroachDB, please consider " +
"reporting on the Github issue or reach out via Support. " +
"This assertion can be disabled by setting the environment variable " +
"COCKROACH_DISABLE_COMMIT_SANITY_CHECK=true",
"reporting on the Github issue or reach out via Support.",
}))
if !DisableCommitSanityCheck && !knobs.DisableCommitSanityCheck {
log.Fatalf(ctx, "%s", err)
}
log.Warningf(ctx, "%v", err)
return err
}

Expand Down