Skip to content

Commit

Permalink
storage: avoid blocking the command queue for scans with limits
Browse files Browse the repository at this point in the history
SQL has a tendancy to create scans which cover a range's entire key
span, though looking only to return a finite number of results. These
requests end up blocking writes in the command queue, which block
other reads, causing requsets to effectively serialize. In reality,
when there is a scan with a limit, the actual affected key span ends
up being a small subset of the range's key span.

This change creates a new "pre-evaluation" execution path for read-
only requests. When pre-evaluating, the batch still interacts with
the command queue, but it neither waits for pre-requisites nor causes
successor, dependent write batches from waiting on it. Instead, any
prereqs or dependents are tracked while the read-only batch is
immediately evaluated without blocking. On successful evaluation,
the actual span(s) covered and recorded in the read-only batch's
`BatchResponse` are compared to the pre-requisite and dependent
commands. If there is no overlap, the timestamp cache is updated and
the read-only batch response returned to the caller. If there is
overlap, then the read-only batch is re-executed, but with the
pre-evaluation option disabled, so that it proceeds to interact with
the command queue in the traditional fashion, waiting on prereqs
and making dependents wait on it.

Release note: improved performance on workloads which mix OLAP queries
with updates.
  • Loading branch information
spencerkimball committed Oct 25, 2018
1 parent 3ee3e10 commit c2d3a00
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 48 deletions.
45 changes: 37 additions & 8 deletions pkg/storage/command_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,12 @@ type CommandQueue struct {
}

type cmd struct {
id int64
key interval.Range
readOnly bool
timestamp hlc.Timestamp
debugInfo summaryWriter
id int64
key interval.Range
readOnly bool
preEvaluate bool // collects pre-requisites and dependents, instead of waiting
timestamp hlc.Timestamp
debugInfo summaryWriter

buffered bool // is this cmd buffered in readsBuffer
expanded bool // have the children been added
Expand All @@ -101,7 +102,10 @@ type cmd struct {
// in the prereq slice to avoid duplicates.
prereqIDs map[int64]struct{}

pending chan struct{} // closed when complete
// If pre-evaluating, collect commands which are prereqs or dependents.
prereqsAndDeps []*cmd

pending chan struct{} // closed when complete, or if pre-evaulating
}

// A summaryWriter is capable of writing a summary about itself. It is typically
Expand Down Expand Up @@ -277,6 +281,16 @@ func (c *cmd) ResolvePendingPrereq() {
}
}

// If the prereq is pre-evaluating, let it know that this command is
// a dependent.
if pre.preEvaluate {
pre.prereqsAndDeps = append(pre.prereqsAndDeps, c)
}
// If the command is pre-evaluating, keep track of prereqs.
if c.preEvaluate {
c.prereqsAndDeps = append(c.prereqsAndDeps, pre)
}

// Truncate the command's prerequisite list so that it no longer includes
// the first prerequisite. Before doing so, nil out prefix of slice to allow
// GC of the first command. Without this, large chunks of the dependency
Expand All @@ -289,7 +303,7 @@ func (c *cmd) ResolvePendingPrereq() {
delete(c.prereqIDs, pre.id)
}

// OptimisticallyResolvePrereqs removes all prerequisite in the cmd's prereq
// OptimisticallyResolvePrereqs removes all prerequisites in the cmd's prereq
// slice that have already finished without blocking on pending commands.
// Prerequisite commands that are still pending or that were canceled are left
// in the prereq slice.
Expand All @@ -313,6 +327,20 @@ func (c *cmd) OptimisticallyResolvePrereqs() {
(*c.prereqs) = (*c.prereqs)[:j]
}

// Overlaps returns whether the supplied range overlaps with any span
// in the command.
func (c *cmd) Overlaps(o interval.Range) bool {
if len(c.children) == 0 {
return interval.ExclusiveOverlapper.Overlap(o, c.key)
}
for _, child := range c.children {
if interval.ExclusiveOverlapper.Overlap(o, child.key) {
return true
}
}
return false
}

// NewCommandQueue returns a new command queue. The boolean specifies whether
// to enable the covering span optimization. With this optimization, whenever
// a command consisting of multiple spans is added, a covering span is computed
Expand Down Expand Up @@ -724,7 +752,7 @@ func (o *overlapHeap) PopOverlap() *cmd {
//
// Returns a nil `cmd` when no spans are given.
func (cq *CommandQueue) add(
readOnly bool, timestamp hlc.Timestamp, prereqs []*cmd, spans []roachpb.Span,
readOnly, preEvaluate bool, timestamp hlc.Timestamp, prereqs []*cmd, spans []roachpb.Span,
) *cmd {
if len(spans) == 0 {
return nil
Expand Down Expand Up @@ -766,6 +794,7 @@ func (cq *CommandQueue) add(
cmd.id = cq.nextID()
cmd.key = coveringSpan.AsRange()
cmd.readOnly = readOnly
cmd.preEvaluate = preEvaluate
cmd.timestamp = timestamp
cmd.prereqsBuf = prereqs
cmd.prereqs = &cmd.prereqsBuf
Expand Down
50 changes: 25 additions & 25 deletions pkg/storage/command_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func getPrereqs(cq *CommandQueue, from, to roachpb.Key, readOnly bool) []*cmd {
}

func add(cq *CommandQueue, from, to roachpb.Key, readOnly bool, prereqs []*cmd) *cmd {
return cq.add(readOnly, zeroTS, prereqs, []roachpb.Span{{Key: from, EndKey: to}})
return cq.add(readOnly, false /* preEvaluate */, zeroTS, prereqs, []roachpb.Span{{Key: from, EndKey: to}})
}

func getPrereqsAndAdd(cq *CommandQueue, from, to roachpb.Key, readOnly bool) ([]*cmd, *cmd) {
Expand Down Expand Up @@ -292,7 +292,7 @@ func TestCommandQueueWithoutCoveringOptimization(t *testing.T) {
c := roachpb.Span{Key: roachpb.Key("c")}

{
cmd := cq.add(false, zeroTS, nil, []roachpb.Span{a, b})
cmd := cq.add(false, false, zeroTS, nil, []roachpb.Span{a, b})
if !cmd.expanded {
t.Errorf("expected non-expanded command, not %+v", cmd)
}
Expand All @@ -306,7 +306,7 @@ func TestCommandQueueWithoutCoveringOptimization(t *testing.T) {
}

{
cmd := cq.add(false, zeroTS, nil, []roachpb.Span{c})
cmd := cq.add(false, false, zeroTS, nil, []roachpb.Span{c})
if cmd.expanded {
t.Errorf("expected unexpanded command, not %+v", cmd)
}
Expand Down Expand Up @@ -360,16 +360,16 @@ func TestCommandQueueIssue6495(t *testing.T) {
}

cq.getPrereqs(false, zeroTS, spans1998)
cmd1998 := cq.add(false, zeroTS, nil, spans1998)
cmd1998 := cq.add(false, false, zeroTS, nil, spans1998)

cq.getPrereqs(true, zeroTS, spans1999)
cmd1999 := cq.add(true, zeroTS, nil, spans1999)
cmd1999 := cq.add(true, false, zeroTS, nil, spans1999)

cq.getPrereqs(true, zeroTS, spans2002)
cq.add(true, zeroTS, nil, spans2002)
cq.add(true, false, zeroTS, nil, spans2002)

cq.getPrereqs(false, zeroTS, spans2003)
cq.add(false, zeroTS, nil, spans2003)
cq.add(false, false, zeroTS, nil, spans2003)

cq.remove(cmd1998)
cq.remove(cmd1999)
Expand Down Expand Up @@ -404,27 +404,27 @@ func TestCommandQueueTimestamps(t *testing.T) {
mkSpan("e", "g"),
}

cmd1 := cq.add(true, makeTS(1, 0), nil, spans1)
cmd1 := cq.add(true, false, makeTS(1, 0), nil, spans1)

pre2 := cq.getPrereqs(true, makeTS(2, 0), spans2)
if pre2 != nil {
t.Errorf("expected nil prereq slice; got %+v", pre2)
}
cmd2 := cq.add(false, makeTS(2, 0), pre2, spans2)
cmd2 := cq.add(false, false, makeTS(2, 0), pre2, spans2)

pre3 := cq.getPrereqs(true, makeTS(3, 0), spans3)
if pre3 != nil {
t.Errorf("expected nil prereq slice; got %+v", pre3)
}
cmd3 := cq.add(false, makeTS(3, 0), pre3, spans3)
cmd3 := cq.add(false, false, makeTS(3, 0), pre3, spans3)

// spans4 should wait on spans3.children[1].
pre4 := cq.getPrereqs(true, makeTS(4, 0), spans4)
expPre := []*cmd{&cmd3.children[1]}
if !reflect.DeepEqual(expPre, pre4) {
t.Errorf("expected prereq commands %+v; got %+v", expPre, pre4)
}
cmd4 := cq.add(true, makeTS(4, 0), pre4, spans4)
cmd4 := cq.add(true, false, makeTS(4, 0), pre4, spans4)

// Verify that an earlier writer for whole span waits on all commands.
pre5 := cq.getPrereqs(false, makeTS(0, 1), []roachpb.Span{mkSpan("a", "g")})
Expand Down Expand Up @@ -500,14 +500,14 @@ func TestCommandQueueEnclosedRead(t *testing.T) {
}

// Add command 1.
cmd1 := cq.add(true, makeTS(2, 0), nil, spans1)
cmd1 := cq.add(true, false, makeTS(2, 0), nil, spans1)

// Add command 2.
pre := cq.getPrereqs(false, makeTS(3, 0), spans2)
if expPre := []*cmd(nil); !reflect.DeepEqual(expPre, pre) {
t.Errorf("expected prereq commands %+v; got %+v", expPre, pre)
}
cmd2 := cq.add(false, makeTS(3, 0), pre, spans2)
cmd2 := cq.add(false, false, makeTS(3, 0), pre, spans2)

// Add command 3.
pre = cq.getPrereqs(false, makeTS(1, 0), spansCandidate)
Expand Down Expand Up @@ -540,14 +540,14 @@ func TestCommandQueueEnclosedWrite(t *testing.T) {
}

// Add command 1.
cmd1 := cq.add(false, makeTS(3, 0), nil, spans1)
cmd1 := cq.add(false, false, makeTS(3, 0), nil, spans1)

// Add command 2.
pre := cq.getPrereqs(true, makeTS(2, 0), spans2)
if expPre := []*cmd(nil); !reflect.DeepEqual(expPre, pre) {
t.Errorf("expected prereq commands %+v; got %+v", expPre, pre)
}
cmd2 := cq.add(true, makeTS(2, 0), nil, spans2)
cmd2 := cq.add(true, false, makeTS(2, 0), nil, spans2)

// Add command 3.
pre = cq.getPrereqs(false, makeTS(1, 0), spansCandidate)
Expand Down Expand Up @@ -576,10 +576,10 @@ func TestCommandQueueTimestampsEmpty(t *testing.T) {
mkSpan("h", ""),
}

cmd1 := cq.add(true, zeroTS, nil, spansR)
cmd2 := cq.add(false, zeroTS, nil, spansW)
cmd3 := cq.add(true, makeTS(1, 0), nil, spansRTS)
cmd4 := cq.add(false, makeTS(1, 0), nil, spansWTS)
cmd1 := cq.add(true, false, zeroTS, nil, spansR)
cmd2 := cq.add(false, false, zeroTS, nil, spansW)
cmd3 := cq.add(true, false, makeTS(1, 0), nil, spansRTS)
cmd4 := cq.add(false, false, makeTS(1, 0), nil, spansWTS)

// A writer will depend on both zero-timestamp spans.
pre := cq.getPrereqs(false, makeTS(1, 0), []roachpb.Span{mkSpan("a", "f")})
Expand Down Expand Up @@ -677,7 +677,7 @@ func TestCommandQueueTransitiveDependencies(t *testing.T) {
{
cq := NewCommandQueue(true)

cq.add(ops1.readOnly, ops1.ts, nil, ops1.spans)
cq.add(ops1.readOnly, false, ops1.ts, nil, ops1.spans)

pre3 := cq.getPrereqs(ops3.readOnly, ops3.ts, ops3.spans)
if expectDependency := len(pre3) > 0; !expectDependency {
Expand All @@ -697,12 +697,12 @@ func TestCommandQueueTransitiveDependencies(t *testing.T) {
cq := NewCommandQueue(true)

// Add command 1.
cmd1 := cq.add(ops1.readOnly, ops1.ts, nil, ops1.spans)
cmd1 := cq.add(ops1.readOnly, false, ops1.ts, nil, ops1.spans)

// Add command 2, taking note of whether it depends on command 1.
pre2 := cq.getPrereqs(ops2.readOnly, ops2.ts, ops2.spans)
dependency2to1 := len(pre2) > 0
cmd2 := cq.add(ops2.readOnly, ops2.ts, pre2, ops2.spans)
cmd2 := cq.add(ops2.readOnly, false, ops2.ts, pre2, ops2.spans)

// Add command 3, taking note of whether it depends on command 1
// or on command 2.
Expand Down Expand Up @@ -755,7 +755,7 @@ func TestCommandQueueGetSnapshotWithChildren(t *testing.T) {
cmd2 := add(cq, roachpb.Key("a"), nil, true, []*cmd{cmd1})
// the following creates a node with two children because it has two spans
// only the children show up in the snapshot.
cq.add(true, zeroTS, []*cmd{cmd2}, []roachpb.Span{
cq.add(true, false, zeroTS, []*cmd{cmd2}, []roachpb.Span{
{Key: roachpb.Key("a"), EndKey: roachpb.Key("b")},
{Key: roachpb.Key("d"), EndKey: roachpb.Key("f")},
})
Expand Down Expand Up @@ -818,7 +818,7 @@ func BenchmarkCommandQueueGetPrereqsAllReadOnly(b *testing.B) {
EndKey: roachpb.Key("aaaaaaaaab"),
}}
for i := 0; i < size; i++ {
cq.add(true, zeroTS, nil, spans)
cq.add(true, false, zeroTS, nil, spans)
}

b.ResetTimer()
Expand Down Expand Up @@ -853,7 +853,7 @@ func BenchmarkCommandQueueReadWriteMix(b *testing.B) {
var cmd *cmd
readOnly := j%(readsPerWrite+1) != 0
prereqs := cq.getPrereqs(readOnly, zeroTS, spans)
cmd = cq.add(readOnly, zeroTS, prereqs, spans)
cmd = cq.add(readOnly, false, zeroTS, prereqs, spans)
if len(liveCmdQueue) == cap(liveCmdQueue) {
cq.remove(<-liveCmdQueue)
}
Expand Down
Loading

0 comments on commit c2d3a00

Please sign in to comment.