Skip to content

Commit

Permalink
Merge pull request #5446 from RaduBerinde/index-sel-limit
Browse files Browse the repository at this point in the history
sql: prefer order-matching index if there is a limit
  • Loading branch information
RaduBerinde committed Mar 22, 2016
2 parents 66fc410 + aa372ba commit 0f7642c
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 49 deletions.
2 changes: 1 addition & 1 deletion sql/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func (p *planner) backfillBatch(b *client.Batch, tableDesc *TableDescriptor) *ro
desc: *tableDesc,
}
scan.initDescDefaults()
rows := p.selectIndex(&selectNode{}, scan, nil, false)
rows := p.selectIndex(&selectNode{}, scan, nil, false, false)

// Construct a map from column ID to the index the value appears at within a
// row.
Expand Down
54 changes: 33 additions & 21 deletions sql/limit.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,60 +25,72 @@ import (
"github.com/cockroachdb/cockroach/util"
)

// limit constructs a limitNode based on the LIMIT and OFFSET clauses.
func (p *planner) limit(limit *parser.Limit, plan planNode) (planNode, error) {
// evalLimit evaluates the Count and Offset fields. If Count is missing, the
// value is MaxInt64. If Offset is missing, the value is 0
func (p *planner) evalLimit(limit *parser.Limit) (count, offset int64, err error) {
count = math.MaxInt64
offset = 0

if limit == nil {
return plan, nil
return count, offset, nil
}

var count, offset int64

data := []struct {
name string
src parser.Expr
dst *int64
defaultVal int64
name string
src parser.Expr
dst *int64
}{
{"LIMIT", limit.Count, &count, math.MaxInt64},
{"OFFSET", limit.Offset, &offset, 0},
{"LIMIT", limit.Count, &count},
{"OFFSET", limit.Offset, &offset},
}

for _, datum := range data {
if datum.src == nil {
*datum.dst = datum.defaultVal
} else {
if datum.src != nil {
if parser.ContainsVars(datum.src) {
return nil, util.Errorf("argument of %s must not contain variables", datum.name)
return 0, 0, util.Errorf("argument of %s must not contain variables", datum.name)
}

normalized, err := p.parser.NormalizeExpr(p.evalCtx, datum.src)
if err != nil {
return nil, err
return 0, 0, err
}
dstDatum, err := normalized.Eval(p.evalCtx)
if err != nil {
return nil, err
return 0, 0, err
}

if dstDatum == parser.DNull {
*datum.dst = datum.defaultVal
// Use the default value.
continue
}

if dstDInt, ok := dstDatum.(parser.DInt); ok {
*datum.dst = int64(dstDInt)
val := int64(dstDInt)
if val < 0 {
return 0, 0, fmt.Errorf("negative value for %s", datum.name)
}
*datum.dst = val
continue
}

return nil, fmt.Errorf("argument of %s must be type %s, not type %s", datum.name, parser.DummyInt.Type(), dstDatum.Type())
return 0, 0, fmt.Errorf("argument of %s must be type %s, not type %s",
datum.name, parser.DummyInt.Type(), dstDatum.Type())
}
}
return count, offset, nil
}

// limit constructs a limitNode based on the LIMIT and OFFSET clauses.
func (p *planner) limit(count, offset int64, plan planNode) planNode {
if count == math.MaxInt64 && offset == 0 {
return plan
}

if count != math.MaxInt64 {
plan.SetLimitHint(offset+count, false /* hard */)
}

return &limitNode{planNode: plan, count: count, offset: offset}, nil
return &limitNode{planNode: plan, count: count, offset: offset}
}

type limitNode struct {
Expand Down
65 changes: 44 additions & 21 deletions sql/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,11 @@ func (p *planner) Select(n *parser.Select, autoCommit bool) (planNode, *roachpb.
if pberr != nil {
return nil, pberr
}
var err error
plan, err = p.limit(limit, sort.wrap(plan))
count, offset, err := p.evalLimit(limit)
if err != nil {
return nil, roachpb.NewError(err)
}
return plan, nil
return p.limit(count, offset, sort.wrap(plan)), nil
}
}

Expand Down Expand Up @@ -270,39 +269,52 @@ func (p *planner) initSelect(
ordering = sort.Ordering().ordering
}

limitCount, limitOffset, err := p.evalLimit(limit)
if err != nil {
return nil, roachpb.NewError(err)
}

if scan, ok := s.table.node.(*scanNode); ok {
// Find the set of columns that we actually need values for. This is an optimization to avoid
// unmarshaling unnecessary values and is also used for index selection.
// Find the set of columns that we actually need values for. This is an
// optimization to avoid unmarshaling unnecessary values and is also
// used for index selection.
neededCols := make([]bool, len(s.table.columns))
for i := range neededCols {
_, ok := s.qvals[columnRef{&s.table, i}]
neededCols[i] = ok
}
scan.setNeededColumns(neededCols)

// If we are only preparing, the filter expression can contain unexpanded subqueries which
// are not supported by splitFilter.
// If we are only preparing, the filter expression can contain
// unexpanded subqueries which are not supported by splitFilter.
if !p.evalCtx.PrepareOnly {
// Compute a filter expression for the scan node.
convFunc := func(expr parser.VariableExpr) (bool, parser.VariableExpr) {
qval := expr.(*qvalue)
if qval.colRef.table != &s.table {
// TODO(radu): when we will support multiple tables, this will be a valid
// case.
// TODO(radu): when we will support multiple tables, this
// will be a valid case.
panic("scan qvalue refers to unknown table")
}
return true, scan.getQValue(qval.colRef.colIdx)
}

scan.filter, s.filter = splitFilter(s.filter, convFunc)
if s.filter != nil {
// Right now we support only one table, so the entire expression should be
// converted.
// Right now we support only one table, so the entire expression
// should be converted.
panic(fmt.Sprintf("residual filter `%s` (scan filter `%s`)", s.filter, scan.filter))
}
}

plan := p.selectIndex(s, scan, ordering, grouping)
// If we have a reasonable limit, prefer an order matching index even if
// it is not covering - unless we are grouping, in which case the limit
// applies to the grouping results and not to the rows we scan.
var preferOrderMatchingIndex bool
if !grouping && len(ordering) > 0 && limitCount <= 1000-limitOffset {
preferOrderMatchingIndex = true
}
plan := p.selectIndex(s, scan, ordering, grouping, preferOrderMatchingIndex)

// Update s.table with the new plan.
s.table.node = plan
Expand All @@ -311,11 +323,7 @@ func (p *planner) initSelect(
s.ordering = s.computeOrdering(s.table.node.Ordering())

// Wrap this node as necessary.
limitNode, err := p.limit(limit, p.distinct(parsed, sort.wrap(group.wrap(s))))
if err != nil {
return nil, roachpb.NewError(err)
}
return limitNode, nil
return p.limit(limitCount, limitOffset, p.distinct(parsed, sort.wrap(group.wrap(s)))), nil
}

// Initializes the table node, given the parsed select expression
Expand Down Expand Up @@ -645,6 +653,8 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo {
return ordering
}

const nonCoveringIndexPenalty = 10

// selectIndex analyzes the scanNode to determine if there is an index
// available that can fulfill the query with a more restrictive scan.
//
Expand All @@ -656,7 +666,11 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo {
// transformed into a set of spans to scan within the index.
//
// If grouping is true, the ordering is the desired ordering for grouping.
func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping bool) planNode {
//
// If preferOrderMatching is true, we prefer an index that matches the desired
// ordering completely, even if it is not a covering index.
func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping,
preferOrderMatching bool) planNode {
if s.desc.isEmpty() || (s.filter == nil && ordering == nil) {
// No table or no where-clause and no ordering.
s.initOrdering(0)
Expand Down Expand Up @@ -737,7 +751,7 @@ func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrder

if ordering != nil {
for _, c := range candidates {
c.analyzeOrdering(sel, s, ordering)
c.analyzeOrdering(sel, s, ordering, preferOrderMatching)
}
}

Expand Down Expand Up @@ -864,7 +878,7 @@ func (v *indexInfo) init(s *scanNode) {
v.cost += float64(1 + len(v.desc.Columns) - len(v.desc.PrimaryIndex.ColumnIDs))
// Non-covering indexes are significantly more expensive than covering
// indexes.
v.cost *= 10
v.cost *= nonCoveringIndexPenalty
}
}
}
Expand All @@ -891,7 +905,11 @@ func (v *indexInfo) analyzeExprs(exprs []parser.Exprs) {
// analyzeOrdering analyzes the ordering provided by the index and determines
// if it matches the ordering requested by the query. Non-matching orderings
// increase the cost of using the index.
func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering) {
//
// If preferOrderMatching is true, we prefer an index that matches the desired
// ordering completely, even if it is not a covering index.
func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering,
preferOrderMatching bool) {
// Compute the prefix of the index for which we have exact constraints. This
// prefix is inconsequential for ordering because the values are identical.
v.exactPrefix = exactPrefix(v.constraints)
Expand All @@ -918,6 +936,11 @@ func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering co
weight := float64(len(ordering)+1) / float64(match+1)
v.cost *= weight

if match == len(ordering) && preferOrderMatching {
// Offset the non-covering index cost penalty.
v.cost *= (1.0 / nonCoveringIndexPenalty)
}

if log.V(2) {
log.Infof("%s: analyzeOrdering: weight=%0.2f reverse=%v index=%d requested=%d",
v.index.Name, weight, v.reverse, indexOrdering, ordering)
Expand Down
10 changes: 4 additions & 6 deletions sql/testdata/explain_debug
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,10 @@ EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC
query ITTT
EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC LIMIT 1 OFFSET 1
----
0 /abc/primary/1/'one' NULL PARTIAL
0 /abc/primary/1/'one'/c 1.1 BUFFERED
1 /abc/primary/2/'two' NULL BUFFERED
2 /abc/primary/3/'three' NULL BUFFERED
0 0 (2, 'two', NULL) FILTERED
1 1 (3, 'three', NULL) ROW
0 /abc/foo/'two' /2 PARTIAL
0 /abc/primary/2/'two' NULL FILTERED
1 /abc/foo/'three' /3 PARTIAL
1 /abc/primary/3/'three' NULL ROW

query ITTT
EXPLAIN (DEBUG) SELECT * FROM abc WHERE a = 2
Expand Down
6 changes: 6 additions & 0 deletions sql/testdata/select
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@ SELECT * FROM xyzw LIMIT a
query error argument of OFFSET must not contain variables
SELECT * FROM xyzw OFFSET a

query error negative value for LIMIT
SELECT * FROM xyzw LIMIT -100

query error negative value for OFFSET
SELECT * FROM xyzw OFFSET -100

query error unsupported binary operator: <int> \+ <float>
SELECT * FROM xyzw OFFSET 1 + 0.0

Expand Down
39 changes: 39 additions & 0 deletions sql/testdata/select_non_covering_index
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,42 @@ EXPLAIN SELECT * FROM t WHERE c > 0 AND d = 8
0 index-join
1 scan t@c /1-
1 scan t@primary

# The following testcases verify that when we have a small limit, we prefer an
# order-matching index.

query ITT
EXPLAIN SELECT * FROM t ORDER BY c
----
0 sort +c
1 scan t@primary -

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5
----
0 limit count: 5, offset: 0
1 index-join
2 scan t@c -
2 scan t@primary

query ITT
EXPLAIN SELECT * FROM t ORDER BY c OFFSET 5
----
0 limit count: ALL, offset: 5
1 sort +c
2 scan t@primary -

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5 OFFSET 5
----
0 limit count: 5, offset: 5
1 index-join
2 scan t@c -
2 scan t@primary

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 1000000
----
0 limit count: 1000000, offset: 0
1 sort +c (top 1000000)
2 scan t@primary -

0 comments on commit 0f7642c

Please sign in to comment.