Skip to content

Commit

Permalink
sql: emit point deletes during delete fastpath
Browse files Browse the repository at this point in the history
Previously, the "deleteRange" SQL operator, which is meant to be a
fast-path for cases in which an entire range of keys can be deleted,
always did what it said: emitted DeleteRange KV operations. This
precludes a crucial optimization: sending point deletes when the list of
deleted keys is exactly known.

For example, a query like `DELETE FROM kv WHERE k = 10000` uses the
"fast path" delete, since it has a contiguous set of keys to delete, and
it doesn't need to know the values that were deleted. But, in this case,
the performance is actually worse if we use a DeleteRange KV operation
for various reasons (see #53939), because:

- ranged KV writes (DeleteRangeRequest) cannot be pipelined because an
  enumeration of the intents that they will leave cannot be known ahead
  of time. They must therefore perform evaluation and replication synchronously.
- ranged KV writes (DeleteRangeRequest) result in ranged intent
  resolution, which is less efficient, especially until we re-enable
  time-bound iterators.

The reason we couldn't previously emit point deletes in this case is
that SQL needs to know whether it deleted something or not. This means
we can't do a "blind put" of a deletion: we need to actually understand
whether there was something that we were "overwriting" with our delete.

This commit adds an optional "return key" parameter to DelRequest, which
returns true if a value was actually deleted.

Additionally, the deleteRange SQL operator detects when it can emit
single-key deletes, and does so.

Release note (performance improvement): point deletes in SQL are now
more efficient during concurrent workloads.
  • Loading branch information
jordanlewis committed Apr 15, 2021
1 parent 4d987b3 commit 7b833ae
Show file tree
Hide file tree
Showing 16 changed files with 655 additions and 487 deletions.
23 changes: 20 additions & 3 deletions pkg/kv/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,13 @@ func (b *Batch) fillResults(ctx context.Context) {
}
}
case *roachpb.DeleteRequest:
row := &result.Rows[k]
row.Key = []byte(args.(*roachpb.DeleteRequest).Key)
deleteRequest := args.(*roachpb.DeleteRequest)
if deleteRequest.ReturnKey && result.Err == nil {
resp := reply.(*roachpb.DeleteResponse)
if resp.FoundKey {
result.Keys = []roachpb.Key{deleteRequest.Key}
}
}
case *roachpb.DeleteRangeRequest:
if result.Err == nil {
result.Keys = reply.(*roachpb.DeleteRangeResponse).Keys
Expand Down Expand Up @@ -612,12 +617,24 @@ func (b *Batch) Del(keys ...interface{}) {
b.initResult(0, len(keys), notRaw, err)
return
}
reqs = append(reqs, roachpb.NewDelete(k))
reqs = append(reqs, roachpb.NewDelete(k, false /* returnKey */))
}
b.appendReqs(reqs...)
b.initResult(len(reqs), len(reqs), notRaw, nil)
}

// DelKey is like Del but it takes a single key, and allows choosing whether or
// not to return whether a key was deleted via the command.
func (b *Batch) DelKey(key interface{}, returnKey bool) {
k, err := marshalKey(key)
if err != nil {
b.initResult(0, 1, notRaw, err)
return
}
b.appendReqs(roachpb.NewDelete(k, returnKey))
b.initResult(0, 1, notRaw, err)
}

// DelRange deletes the rows between begin (inclusive) and end (exclusive).
//
// A new result will be appended to the batch which will contain 0 rows and
Expand Down
3 changes: 2 additions & 1 deletion pkg/kv/kvclient/kvcoord/dist_sender.go
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,8 @@ func (ds *DistSender) initAndVerifyBatch(
inner := req.GetInner()
switch inner.(type) {
case *roachpb.ScanRequest, *roachpb.ResolveIntentRangeRequest,
*roachpb.DeleteRangeRequest, *roachpb.RevertRangeRequest, *roachpb.ExportRequest:
*roachpb.DeleteRequest, *roachpb.DeleteRangeRequest,
*roachpb.RevertRangeRequest, *roachpb.ExportRequest:
// Accepted forward range requests.
if isReverse {
return roachpb.NewErrorf("batch with limit contains both forward and reverse scans")
Expand Down
13 changes: 12 additions & 1 deletion pkg/kv/kvserver/batcheval/cmd_delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,18 @@ func Delete(
args := cArgs.Args.(*roachpb.DeleteRequest)
h := cArgs.Header

err := storage.MVCCDelete(ctx, readWriter, cArgs.Stats, args.Key, h.Timestamp, h.Txn)
var err error
if args.ReturnKey {
reply := resp.(*roachpb.DeleteResponse)
reply.FoundKey, err = storage.MVCCDeleteReturningExistence(
ctx, readWriter, cArgs.Stats, args.Key, h.Timestamp, h.Txn)
if err != nil {
return result.Result{}, err
}
} else {
err = storage.MVCCDelete(ctx, readWriter, cArgs.Stats, args.Key, h.Timestamp, h.Txn)
}

// NB: even if MVCC returns an error, it may still have written an intent
// into the batch. This allows callers to consume errors like WriteTooOld
// without re-evaluating the batch. This behavior isn't particularly
Expand Down
3 changes: 2 additions & 1 deletion pkg/roachpb/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -1074,11 +1074,12 @@ func NewInitPut(key Key, value Value, failOnTombstones bool) Request {
}

// NewDelete returns a Request initialized to delete the value at key.
func NewDelete(key Key) Request {
func NewDelete(key Key, returnKey bool) Request {
return &DeleteRequest{
RequestHeader: RequestHeader{
Key: key,
},
ReturnKey: returnKey,
}
}

Expand Down
985 changes: 528 additions & 457 deletions pkg/roachpb/api.pb.go

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions pkg/roachpb/api.proto
Original file line number Diff line number Diff line change
Expand Up @@ -264,11 +264,17 @@ message IncrementResponse {
// A DeleteRequest is the argument to the Delete() method.
message DeleteRequest {
RequestHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];

// return the key that is deleted in the response.
bool return_key = 3;
}

// A DeleteResponse is the return value from the Delete() method.
message DeleteResponse {
ResponseHeader header = 1 [(gogoproto.nullable) = false, (gogoproto.embed) = true];

// True if return_key was set and there was a key that got deleted.
bool found_key = 2;
}

// A DeleteRangeRequest is the argument to the DeleteRange() method. It
Expand Down
13 changes: 10 additions & 3 deletions pkg/sql/delete_range.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,17 @@ func (d *deleteRangeNode) deleteSpans(params runParams, b *kv.Batch, spans roach
ctx := params.ctx
traceKV := params.p.ExtendedEvalContext().Tracing.KVTracingEnabled()
for _, span := range spans {
if traceKV {
log.VEventf(ctx, 2, "DelRange %s - %s", span.Key, span.EndKey)
if span.EndKey == nil {
if traceKV {
log.VEventf(ctx, 2, "Del %s", span.Key)
}
b.DelKey(span.Key, true /* returnKey */)
} else {
if traceKV {
log.VEventf(ctx, 2, "DelRange %s - %s", span.Key, span.EndKey)
}
b.DelRange(span.Key, span.EndKey, true /* returnKeys */)
}
b.DelRange(span.Key, span.EndKey, true /* returnKeys */)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ WHERE message LIKE '%r$rangeid: sending batch%'
----
dist sender send r37: sending batch 1 Scan to (n1,s1):1
dist sender send r37: sending batch 2 Put to (n1,s1):1
dist sender send r37: sending batch 1 DelRng, 1 EndTxn to (n1,s1):1
dist sender send r37: sending batch 1 Del, 1 EndTxn to (n1,s1):1

# Multi-row delete should auto-commit.
query B
Expand Down Expand Up @@ -745,7 +745,7 @@ WHERE message LIKE '%r$rangeid: sending batch%'
AND operation NOT LIKE '%async%'
----
dist sender send r37: sending batch 1 DelRng to (n1,s1):1
dist sender send r37: sending batch 1 DelRng to (n1,s1):1
dist sender send r37: sending batch 1 Del to (n1,s1):1
dist sender send r37: sending batch 1 Scan to (n1,s1):1
dist sender send r37: sending batch 1 Del, 1 EndTxn to (n1,s1):1

Expand Down
13 changes: 7 additions & 6 deletions pkg/sql/opt/exec/execbuilder/testdata/cascade
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ query T
SELECT message FROM [SHOW TRACE FOR SESSION]
WHERE message LIKE '%executing cascade %' OR message LIKE 'Del%'
----
DelRange /Table/53/1/"a-pk1" - /Table/53/1/"a-pk1"/#
Del /Table/53/1/"a-pk1"/0
executing cascade for constraint fk_delete_cascade_ref_a
Del /Table/54/1/"b1-pk1"/0
Del /Table/54/1/"b1-pk2"/0
Expand Down Expand Up @@ -315,7 +315,7 @@ query T
SELECT message FROM [SHOW TRACE FOR SESSION]
WHERE message LIKE '%executing cascade %' OR message LIKE 'Del%'
----
DelRange /Table/70/1/"a-pk1" - /Table/70/1/"a-pk1"/#
Del /Table/70/1/"a-pk1"/0
executing cascade for constraint fk_delete_cascade_ref_a
Del /Table/71/1/"b1-pk1"/0
Del /Table/71/1/"b1-pk2"/0
Expand Down Expand Up @@ -603,7 +603,7 @@ query T
SELECT message FROM [SHOW TRACE FOR SESSION]
WHERE message LIKE '%executing cascade %' OR message LIKE 'Del%'
----
DelRange /Table/92/1/"a-pk1" - /Table/92/1/"a-pk1"/#
Del /Table/92/1/"a-pk1"/0
executing cascade for constraint fk_delete_cascade_ref_a
Del /Table/93/1/"b1-pk1"/0
Del /Table/93/1/"b1-pk2"/0
Expand Down Expand Up @@ -798,7 +798,8 @@ subtest DelRange
statement ok
CREATE TABLE delrng1 (
p INT PRIMARY KEY,
data INT
data INT,
FAMILY (p, data)
);
CREATE TABLE delrng2 (
p INT,
Expand Down Expand Up @@ -841,10 +842,10 @@ vectorized: true
└── • fk-cascade
fk: fk_p_ref_delrng1

query T kvtrace(DelRange)
query T kvtrace(Del,DelRange)
DELETE FROM delrng1 WHERE p = 1
----
DelRange /Table/112/1/1 - /Table/112/1/1/#
Del /Table/112/1/1/0
DelRange /Table/113/1/1 - /Table/113/1/2
DelRange /Table/114/1/1 - /Table/114/1/2
DelRange /Table/115/1/1 - /Table/115/1/2
Expand Down
23 changes: 21 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/delete
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,28 @@ SET tracing = on,kv; DELETE FROM a WHERE a = 5; SET tracing = off

query TT
SELECT operation, message FROM [SHOW KV TRACE FOR SESSION]
WHERE message LIKE '%DelRange%' OR message LIKE '%sending batch%'
WHERE message LIKE '%Del%' OR message LIKE '%sending batch%'
----
flow DelRange /Table/57/1/5 - /Table/57/1/5/#
flow Del /Table/57/1/5/0
dist sender send r37: sending batch 1 Del, 1 EndTxn to (n1,s1):1

# Ensure that we send DelRanges when doing a point delete operation on a table
# that has multiple column families.

statement ok
CREATE TABLE multicf (a INT PRIMARY KEY, b INT, FAMILY (a), FAMILY (b))

statement ok
SELECT * FROM multicf

statement ok
SET tracing = on,kv; DELETE FROM multicf WHERE a = 5; SET tracing = off

query TT
SELECT operation, message FROM [SHOW KV TRACE FOR SESSION]
WHERE message LIKE '%Del%' OR message LIKE '%sending batch%'
----
flow DelRange /Table/58/1/5 - /Table/58/1/5/#
dist sender send r37: sending batch 1 DelRng, 1 EndTxn to (n1,s1):1

# Test use of fast path when there are interleaved tables.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ INSERT INTO t57085_c VALUES (10, 1, true), (20, 1, false), (30, 2, true);
query T kvtrace
DELETE FROM t57085_p WHERE p = 1;
----
DelRange /Table/56/1/1 - /Table/56/1/1/#
Del /Table/56/1/1/0
Scan /Table/57/{1-2}
Del /Table/57/2/1/10/0
Del /Table/57/1/10/0
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/row/fk_spans.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func FKCheckSpan(
}
// If it is safe to split this lookup into multiple families, generate a point lookup for
// family 0. Because we are just checking for existence, we only need family 0.
if s.CanSplitSpanIntoFamilySpans(1 /* numNeededFamilies */, numCols, containsNull) {
if s.CanSplitSpanIntoFamilySpans(1 /* numNeededFamilies */, numCols, containsNull, false /* forDelete */) {
return s.SpanToPointSpan(span, 0), nil
}
return span, nil
Expand Down
27 changes: 20 additions & 7 deletions pkg/sql/span/span_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ func (s *Builder) SpanToPointSpan(span roachpb.Span, family descpb.FamilyID) roa
func (s *Builder) MaybeSplitSpanIntoSeparateFamilies(
appendTo roachpb.Spans, span roachpb.Span, prefixLen int, containsNull bool,
) roachpb.Spans {
if s.neededFamilies != nil && s.CanSplitSpanIntoFamilySpans(len(s.neededFamilies), prefixLen, containsNull) {
if s.neededFamilies != nil && s.CanSplitSpanIntoFamilySpans(len(s.neededFamilies), prefixLen, containsNull,
false /*forDelete*/) {
return rowenc.SplitRowKeyIntoFamilySpans(appendTo, span.Key, s.neededFamilies)
}
return append(appendTo, span)
Expand All @@ -179,7 +180,7 @@ func (s *Builder) MaybeSplitSpanIntoSeparateFamilies(
// CanSplitSpanIntoFamilySpans returns whether a span encoded with prefixLen keys and numNeededFamilies
// needed families can be safely split into 1 or more family specific spans.
func (s *Builder) CanSplitSpanIntoFamilySpans(
numNeededFamilies, prefixLen int, containsNull bool,
numNeededFamilies, prefixLen int, containsNull bool, forDelete bool,
) bool {
// We can only split a span into separate family specific point lookups if:

Expand Down Expand Up @@ -231,6 +232,20 @@ func (s *Builder) CanSplitSpanIntoFamilySpans(
}
}

// * If we're performing a delete...
if forDelete {
// * The table must have just one column family, since we need to delete
// all column families during delete operations.
if s.table.NumFamilies() > 1 {
return false
}
// * The table must not be interleaved, since deleting from an interleaved
// table may require a range delete.
if s.table.IsInterleaved() {
return false
}
}

// We've passed all the conditions, and should be able to safely split this
// span into multiple column-family-specific spans.
return true
Expand Down Expand Up @@ -302,13 +317,11 @@ func (s *Builder) appendSpansFromConstraintSpan(

// Optimization: for single row lookups on a table with one or more column
// families, only scan the relevant column families, and use GetRequests
// instead of ScanRequests when doing the column family fetches. This is
// disabled for deletions on tables with multiple column families to ensure
// that the entire row (all of its column families) is deleted.
// instead of ScanRequests when doing the column family fetches.

if needed.Len() > 0 && span.Key.Equal(span.EndKey) && !forDelete {
if needed.Len() > 0 && span.Key.Equal(span.EndKey) {
neededFamilyIDs := rowenc.NeededColumnFamilyIDs(needed, s.table, s.index)
if s.CanSplitSpanIntoFamilySpans(len(neededFamilyIDs), cs.StartKey().Length(), containsNull) {
if s.CanSplitSpanIntoFamilySpans(len(neededFamilyIDs), cs.StartKey().Length(), containsNull, forDelete) {
return rowenc.SplitRowKeyIntoFamilySpans(appendTo, span.Key, neededFamilyIDs), nil
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/span/span_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func TestSpanBuilderDoesNotSplitSystemTableFamilySpans(t *testing.T) {
systemschema.DescriptorTable.GetPrimaryIndex().IndexDesc())

if res := builder.CanSplitSpanIntoFamilySpans(
1, 1, false); res {
1, 1, false /* containsNull */, false /* forDelete */); res {
t.Errorf("expected the system table to not be splittable")
}
}
2 changes: 1 addition & 1 deletion pkg/sql/span_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func TestSpanBuilderCanSplitSpan(t *testing.T) {
}
builder := span.MakeBuilder(evalCtx, execCfg.Codec, desc, idx.IndexDesc())
if res := builder.CanSplitSpanIntoFamilySpans(
tc.numNeededFamilies, tc.prefixLen, tc.containsNull); res != tc.canSplit {
tc.numNeededFamilies, tc.prefixLen, tc.containsNull, false); res != tc.canSplit {
t.Errorf("expected result to be %v, but found %v", tc.canSplit, res)
}
})
Expand Down
21 changes: 21 additions & 0 deletions pkg/storage/mvcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,27 @@ func MVCCDelete(
return mvccPutUsingIter(ctx, rw, iter, ms, key, timestamp, noValue, txn, nil /* valueFn */)
}

// MVCCDeleteReturningExistence is like MVCCDelete, but it returns whether the key
// that was passed in had a value already in the database.
func MVCCDeleteReturningExistence(
ctx context.Context,
rw ReadWriter,
ms *enginepb.MVCCStats,
key roachpb.Key,
timestamp hlc.Timestamp,
txn *roachpb.Transaction,
) (foundKey bool, err error) {
iter := newMVCCIterator(rw, timestamp.IsEmpty(), IterOptions{Prefix: true})
defer iter.Close()

valueFn := func(value optionalValue) ([]byte, error) {
foundKey = len(value.RawBytes) > 0
return nil, nil
}
err = mvccPutUsingIter(ctx, rw, iter, ms, key, timestamp, noValue, txn, valueFn)
return foundKey, err
}

var noValue = roachpb.Value{}

// mvccPutUsingIter sets the value for a specified key using the provided
Expand Down

0 comments on commit 7b833ae

Please sign in to comment.