Skip to content

Commit

Permalink
kv: respect exhausted key limit during ranged intent resolution
Browse files Browse the repository at this point in the history
Fixes cockroachdb#47471.
Fixes cockroachdb#40935.

This commit fixes a long-standing bug where ranged intent resolution
would not respect the MaxSpanRequestKeys set on a batch once the limit
had already been exhausted by other requests in the same batch. Instead
of treating the limit as exhausted, ranged intent resolution would
consider the limit nonexistent (unbounded). This bug was triggering an
assertion in DistSender. We became more likely to hit this issue in
v20.1 because we started performing ranged intent resolution more often
due to implicit SELECT FOR UPDATE.

This commit fixes the bug in two ways:
1. it addresses the root cause, updating MVCCResolveWriteIntentRangeUsingIter
  to properly respect the limit placed on the request when it is exhauted.
2. it disables the assertion in DistSender when it detects that we are hitting
  this bug. This ensures that we don't hit the assertion in mixed version
  clusters (see cockroachdb#40935). By the time we're in DistSender, the damage is
  already done and has already potentially resulted in a large Raft entry.
  Maintaining the assertion doesn't do us any good.

Release notes (bug fix): a bug that could could trigger an assertion
with the text "received X results, limit was Y" has been fixed. The
underlying bug was only performance related and could not cause
user-visible correctness violations.

Release justification: fixes a medium-priority bug in existing
functionality. The bug could result in an assertion failure and a node
crashing. Even though this was an old bug (present in many releases
before v20.1), it became a lot easier to hit in v20.1 because we started
performing ranged intent resolution more often due to implicit SELECT
FOR UPDATE.
  • Loading branch information
nvanbenschoten committed Apr 15, 2020
1 parent 136492e commit b70f635
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 13 deletions.
17 changes: 15 additions & 2 deletions pkg/kv/kvclient/kvcoord/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -1247,8 +1247,21 @@ func (ds *DistSender) divideAndSendBatchToRanges(
// passed recursively to further divideAndSendBatchToRanges() calls.
if ba.MaxSpanRequestKeys > 0 {
if replyResults > ba.MaxSpanRequestKeys {
log.Fatalf(ctx, "received %d results, limit was %d",
replyResults, ba.MaxSpanRequestKeys)
// NOTE: v19.2 and below have a bug where MaxSpanRequestKeys
// is not respected by ResolveIntentRangeRequest once the
// limit has already been exhausted by the batch. This is
// mostly harmless (or at least, the damage has already been
// done by this point and resulted in a large Raft entry)
// and has been fixed in v20.1+, so don't bother hitting the
// assertion.
//
// TODO(nvanbenschoten): remove this hack in v20.2.
if _, ok := ba.GetArg(roachpb.ResolveIntentRange); ok {
replyResults = ba.MaxSpanRequestKeys
} else {
log.Fatalf(ctx, "received %d results, limit was %d",
replyResults, ba.MaxSpanRequestKeys)
}
}
ba.MaxSpanRequestKeys -= replyResults
// Exiting; any missing responses will be filled in via defer().
Expand Down
78 changes: 70 additions & 8 deletions pkg/kv/kvserver/replica_evaluate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,55 @@ func TestEvaluateBatch(t *testing.T) {
verifyAcquiredLocks(t, r, lock.Replicated, []string(nil)...)
},
},
//
// Test suite for ResolveIntentRange with and without limits.
//
{
// Three range intent resolutions that observe 3, 1, and 0 intent,
// respectively. All intents should be resolved.
name: "ranged intent resolution",
setup: func(t *testing.T, d *data) {
writeABCDEFIntents(t, d, &txn)
d.ba.Add(resolveIntentRangeArgsString("a", "d", txn.TxnMeta, roachpb.COMMITTED))
d.ba.Add(resolveIntentRangeArgsString("e", "f", txn.TxnMeta, roachpb.COMMITTED))
d.ba.Add(resolveIntentRangeArgsString("h", "j", txn.TxnMeta, roachpb.COMMITTED))
},
check: func(t *testing.T, r resp) {
verifyNumKeys(t, r, 3, 1, 0)
verifyResumeSpans(t, r, "", "", "")
},
},
{
// Resolving intents with a giant limit should resolve everything.
name: "ranged intent resolution with giant MaxSpanRequestKeys",
setup: func(t *testing.T, d *data) {
writeABCDEFIntents(t, d, &txn)
d.ba.Add(resolveIntentRangeArgsString("a", "d", txn.TxnMeta, roachpb.COMMITTED))
d.ba.Add(resolveIntentRangeArgsString("e", "f", txn.TxnMeta, roachpb.COMMITTED))
d.ba.Add(resolveIntentRangeArgsString("h", "j", txn.TxnMeta, roachpb.COMMITTED))
d.ba.MaxSpanRequestKeys = 100000
},
check: func(t *testing.T, r resp) {
verifyNumKeys(t, r, 3, 1, 0)
verifyResumeSpans(t, r, "", "", "")
},
},
{
// A batch limited to resolve only up to 3 keys should respect that
// limit. The limit is saturated by the first request in the batch.
name: "ranged intent resolution with MaxSpanRequestKeys=3",
setup: func(t *testing.T, d *data) {
writeABCDEFIntents(t, d, &txn)
d.ba.Add(resolveIntentRangeArgsString("a", "d", txn.TxnMeta, roachpb.COMMITTED))
d.ba.Add(resolveIntentRangeArgsString("e", "f", txn.TxnMeta, roachpb.COMMITTED))
d.ba.Add(resolveIntentRangeArgsString("h", "j", txn.TxnMeta, roachpb.COMMITTED))
d.ba.MaxSpanRequestKeys = 3
},
check: func(t *testing.T, r resp) {
verifyNumKeys(t, r, 3, 0, 0)
verifyResumeSpans(t, r, "c\x00-d", "e-f", "h-j")
},
},
}

for _, tc := range tcs {
Expand Down Expand Up @@ -565,10 +614,14 @@ type testCase struct {
}

func writeABCDEF(t *testing.T, d *data) {
writeABCDEFIntents(t, d, nil /* txn */)
}

func writeABCDEFIntents(t *testing.T, d *data, txn *roachpb.Transaction) {
for _, k := range []string{"a", "b", "c", "d", "e", "f"} {
require.NoError(t, storage.MVCCPut(
context.Background(), d.eng, nil /* ms */, roachpb.Key(k), d.ba.Timestamp,
roachpb.MakeValueFromString("value-"+k), nil /* txn */))
roachpb.MakeValueFromString("value-"+k), txn))
}
}

Expand Down Expand Up @@ -607,17 +660,26 @@ func verifyScanResult(t *testing.T, r resp, keysPerResp ...[]string) {
}
}

func verifyNumKeys(t *testing.T, r resp, keysPerResp ...int) {
require.Nil(t, r.pErr)
require.NotNil(t, r.br)
require.Len(t, r.br.Responses, len(keysPerResp))
for i, keys := range keysPerResp {
actKeys := int(r.br.Responses[i].GetInner().Header().NumKeys)
require.Equal(t, keys, actKeys, "in response #%i", i+1)
}
}

func verifyResumeSpans(t *testing.T, r resp, resumeSpans ...string) {
for i, span := range resumeSpans {
if span == "" {
continue // don't check request
}
rs := r.br.Responses[i].GetInner().Header().ResumeSpan
var act string
if rs != nil {
act = fmt.Sprintf("%s-%s", string(rs.Key), string(rs.EndKey))
if span == "" {
require.Nil(t, rs)
} else {
require.NotNil(t, rs)
act := fmt.Sprintf("%s-%s", string(rs.Key), string(rs.EndKey))
require.Equal(t, span, act, "#%d", i+1)
}
require.Equal(t, span, act, "#%d", i+1)
}
}

Expand Down
10 changes: 10 additions & 0 deletions pkg/kv/kvserver/replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1756,6 +1756,16 @@ func queryIntentArgs(
}
}

func resolveIntentRangeArgsString(
s, e string, txn enginepb.TxnMeta, status roachpb.TransactionStatus,
) *roachpb.ResolveIntentRangeRequest {
return &roachpb.ResolveIntentRangeRequest{
RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(s), EndKey: roachpb.Key(e)},
IntentTxn: txn,
Status: status,
}
}

func internalMergeArgs(key []byte, value roachpb.Value) roachpb.MergeRequest {
return roachpb.MergeRequest{
RequestHeader: roachpb.RequestHeader{
Expand Down
13 changes: 11 additions & 2 deletions pkg/storage/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -3055,8 +3055,12 @@ func MVCCResolveWriteIntentRange(
// MVCCResolveWriteIntentRangeUsingIter commits or aborts (rolls back)
// the range of write intents specified by start and end keys for a
// given txn. ResolveWriteIntentRange will skip write intents of other
// txns. Returns the number of intents resolved and a resume span if
// the max keys limit was exceeded. A max of zero means unbounded.
// txns.
//
// Returns the number of intents resolved and a resume span if the max
// keys limit was exceeded. A max of zero means unbounded. A max of -1
// means resolve nothing and return the entire intent span as the resume
// span.
func MVCCResolveWriteIntentRangeUsingIter(
ctx context.Context,
rw ReadWriter,
Expand All @@ -3065,6 +3069,11 @@ func MVCCResolveWriteIntentRangeUsingIter(
intent roachpb.LockUpdate,
max int64,
) (int64, *roachpb.Span, error) {
if max < 0 {
resumeSpan := intent.Span // don't inline or `intent` would escape to heap
return 0, &resumeSpan, nil
}

encKey := MakeMVCCMetadataKey(intent.Key)
encEndKey := MakeMVCCMetadataKey(intent.EndKey)
nextKey := encKey
Expand Down
3 changes: 2 additions & 1 deletion pkg/storage/mvcc_history_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,8 @@ var commands = map[string]cmd{
"txn_update": {typTxnUpdate, cmdTxnUpdate},

"resolve_intent": {typDataUpdate, cmdResolveIntent},
"check_intent": {typReadOnly, cmdCheckIntent},
// TODO(nvanbenschoten): test "resolve_intent_range".
"check_intent": {typReadOnly, cmdCheckIntent},

"clear_range": {typDataUpdate, cmdClearRange},
"cput": {typDataUpdate, cmdCPut},
Expand Down

0 comments on commit b70f635

Please sign in to comment.