Skip to content

Commit

Permalink
Merge #107596
Browse files Browse the repository at this point in the history
107596: kvcoord: disable fatal assertion on transaction commit sanity check r=AlexTalks a=AlexTalks

As #103817 is now a known issue for which a root cause has been identified, the sanity check assertion in place here is disabled, with errors occuring due to the known bug properly linking to the issue ticket. While the transaction coordinator still performs sanity checks to ensure that operations cannot continue on an already-committed transaction, it will no longer log the error with severity `FATAL`, resulting in node crash, and will instead simply return the error.

Part of: #103817

Release note: None

Co-authored-by: Alex Sarkesian <[email protected]>
  • Loading branch information
craig[bot] and AlexTalks committed Jul 26, 2023
2 parents 389d3f0 + 1c906bb commit d93799f
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 29 deletions.
4 changes: 1 addition & 3 deletions pkg/kv/kvclient/kvcoord/replayed_commit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func (f *interceptingTransport) SendNext(
// in which DistSender retries an (unbeknownst to it) successful EndTxn(commit=true)
// RPC. It documents that this triggers an assertion failure in TxnCoordSender.
//
// See: https://github.com/cockroachdb/cockroach/issues/67765
// See: https://github.com/cockroachdb/cockroach/issues/103817
func TestCommitSanityCheckAssertionFiresOnUndetectedAmbiguousCommit(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down Expand Up @@ -89,8 +89,6 @@ func TestCommitSanityCheckAssertionFiresOnUndetectedAmbiguousCommit(t *testing.T
},
}, nil
},
// Turn the assertion into an error returned via the txn.
DisableCommitSanityCheck: true,
}

tc := testcluster.StartTestCluster(t, 1, args)
Expand Down
4 changes: 0 additions & 4 deletions pkg/kv/kvclient/kvcoord/testing_knobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@ type ClientTestingKnobs struct {
// only applies to requests sent with the LEASEHOLDER routing policy.
DontReorderReplicas bool

// DisableCommitSanityCheck allows "setting" the DisableCommitSanityCheck to
// true without actually overriding the variable.
DisableCommitSanityCheck bool

// CommitWaitFilter allows tests to instrument the beginning of a transaction
// commit wait sleep.
CommitWaitFilter func()
Expand Down
34 changes: 12 additions & 22 deletions pkg/kv/kvclient/kvcoord/txn_coord_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/isolation"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
Expand All @@ -35,12 +34,6 @@ const (
OpTxnCoordSender = "txn coordinator send"
)

// DisableCommitSanityCheck allows opting out of a fatal assertion error that was observed in the wild
// and for which a root cause is not yet available.
//
// See: https://github.com/cockroachdb/cockroach/pull/73512.
var DisableCommitSanityCheck = envutil.EnvOrDefaultBool("COCKROACH_DISABLE_COMMIT_SANITY_CHECK", false)

// txnState represents states relating to whether an EndTxn request needs
// to be sent.
//
Expand Down Expand Up @@ -969,13 +962,14 @@ func (tc *TxnCoordSender) updateStateLocked(
// encounter such errors. Marking a transaction as explicitly-committed can also
// encounter these errors, but those errors don't make it to the TxnCoordSender.
//
// Returns the passed-in error or fatals (depending on DisableCommitSanityCheck
// env var), wrapping the input error in case of an assertion violation.
// Wraps the error in case of an assertion violation, otherwise returns as-is.
//
// The assertion is known to have failed in the wild, see:
// https://github.com/cockroachdb/cockroach/issues/67765
// This checks for the occurence of a known issue involving ambiguous write
// errors that occur alongside commits, which may race with transaction
// recovery requests started by contending operations.
// https://github.com/cockroachdb/cockroach/issues/103817
func sanityCheckErrWithTxn(
ctx context.Context, pErrWithTxn *kvpb.Error, ba *kvpb.BatchRequest, knobs *ClientTestingKnobs,
ctx context.Context, pErrWithTxn *kvpb.Error, ba *kvpb.BatchRequest, _ *ClientTestingKnobs,
) error {
txn := pErrWithTxn.GetTxn()
if txn.Status != roachpb.COMMITTED {
Expand All @@ -988,24 +982,20 @@ func sanityCheckErrWithTxn(
return nil
}

// Finding out about our transaction being committed indicates a serious bug.
// Requests are not supposed to be sent on transactions after they are
// committed.
// Finding out about our transaction being committed indicates a serious but
// known bug. Requests are not supposed to be sent on transactions after they
// are committed.
err := errors.Wrapf(pErrWithTxn.GoError(),
"transaction unexpectedly committed, ba: %s. txn: %s",
ba, pErrWithTxn.GetTxn(),
)
err = errors.WithAssertionFailure(
errors.WithIssueLink(err, errors.IssueLink{
IssueURL: "https://github.com/cockroachdb/cockroach/issues/67765",
IssueURL: "https://github.com/cockroachdb/cockroach/issues/103817",
Detail: "you have encountered a known bug in CockroachDB, please consider " +
"reporting on the Github issue or reach out via Support. " +
"This assertion can be disabled by setting the environment variable " +
"COCKROACH_DISABLE_COMMIT_SANITY_CHECK=true",
"reporting on the Github issue or reach out via Support.",
}))
if !DisableCommitSanityCheck && !knobs.DisableCommitSanityCheck {
log.Fatalf(ctx, "%s", err)
}
log.Warningf(ctx, "%v", err)
return err
}

Expand Down

0 comments on commit d93799f

Please sign in to comment.