Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: prefer order-matching index if there is a limit #5446

Merged
merged 2 commits into from
Mar 22, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sql/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func (p *planner) backfillBatch(b *client.Batch, tableDesc *TableDescriptor) *ro
desc: *tableDesc,
}
scan.initDescDefaults()
rows := p.selectIndex(&selectNode{}, scan, nil, false)
rows := p.selectIndex(&selectNode{}, scan, nil, false, false)

// Construct a map from column ID to the index the value appears at within a
// row.
Expand Down
54 changes: 33 additions & 21 deletions sql/limit.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,60 +25,72 @@ import (
"github.com/cockroachdb/cockroach/util"
)

// limit constructs a limitNode based on the LIMIT and OFFSET clauses.
func (p *planner) limit(limit *parser.Limit, plan planNode) (planNode, error) {
// evalLimit evaluates the Count and Offset fields. If Count is missing, the
// value is MaxInt64. If Offset is missing, the value is 0
func (p *planner) evalLimit(limit *parser.Limit) (count, offset int64, err error) {
count = math.MaxInt64
offset = 0

if limit == nil {
return plan, nil
return count, offset, nil
}

var count, offset int64

data := []struct {
name string
src parser.Expr
dst *int64
defaultVal int64
name string
src parser.Expr
dst *int64
}{
{"LIMIT", limit.Count, &count, math.MaxInt64},
{"OFFSET", limit.Offset, &offset, 0},
{"LIMIT", limit.Count, &count},
{"OFFSET", limit.Offset, &offset},
}

for _, datum := range data {
if datum.src == nil {
*datum.dst = datum.defaultVal
} else {
if datum.src != nil {
if parser.ContainsVars(datum.src) {
return nil, util.Errorf("argument of %s must not contain variables", datum.name)
return 0, 0, util.Errorf("argument of %s must not contain variables", datum.name)
}

normalized, err := p.parser.NormalizeExpr(p.evalCtx, datum.src)
if err != nil {
return nil, err
return 0, 0, err
}
dstDatum, err := normalized.Eval(p.evalCtx)
if err != nil {
return nil, err
return 0, 0, err
}

if dstDatum == parser.DNull {
*datum.dst = datum.defaultVal
// Use the default value.
continue
}

if dstDInt, ok := dstDatum.(parser.DInt); ok {
*datum.dst = int64(dstDInt)
val := int64(dstDInt)
if val < 0 {
return 0, 0, fmt.Errorf("negative value for %s", datum.name)
}
*datum.dst = val
continue
}

return nil, fmt.Errorf("argument of %s must be type %s, not type %s", datum.name, parser.DummyInt.Type(), dstDatum.Type())
return 0, 0, fmt.Errorf("argument of %s must be type %s, not type %s",
datum.name, parser.DummyInt.Type(), dstDatum.Type())
}
}
return count, offset, nil
}

// limit constructs a limitNode based on the LIMIT and OFFSET clauses.
func (p *planner) limit(count, offset int64, plan planNode) planNode {
if count == math.MaxInt64 && offset == 0 {
return plan
}

if count != math.MaxInt64 {
plan.SetLimitHint(offset+count, false /* hard */)
}

return &limitNode{planNode: plan, count: count, offset: offset}, nil
return &limitNode{planNode: plan, count: count, offset: offset}
}

type limitNode struct {
Expand Down
65 changes: 44 additions & 21 deletions sql/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,12 +199,11 @@ func (p *planner) Select(n *parser.Select, autoCommit bool) (planNode, *roachpb.
if pberr != nil {
return nil, pberr
}
var err error
plan, err = p.limit(limit, sort.wrap(plan))
count, offset, err := p.evalLimit(limit)
if err != nil {
return nil, roachpb.NewError(err)
}
return plan, nil
return p.limit(count, offset, sort.wrap(plan)), nil
}
}

Expand Down Expand Up @@ -270,39 +269,52 @@ func (p *planner) initSelect(
ordering = sort.Ordering().ordering
}

limitCount, limitOffset, err := p.evalLimit(limit)
if err != nil {
return nil, roachpb.NewError(err)
}

if scan, ok := s.table.node.(*scanNode); ok {
// Find the set of columns that we actually need values for. This is an optimization to avoid
// unmarshaling unnecessary values and is also used for index selection.
// Find the set of columns that we actually need values for. This is an
// optimization to avoid unmarshaling unnecessary values and is also
// used for index selection.
neededCols := make([]bool, len(s.table.columns))
for i := range neededCols {
_, ok := s.qvals[columnRef{&s.table, i}]
neededCols[i] = ok
}
scan.setNeededColumns(neededCols)

// If we are only preparing, the filter expression can contain unexpanded subqueries which
// are not supported by splitFilter.
// If we are only preparing, the filter expression can contain
// unexpanded subqueries which are not supported by splitFilter.
if !p.evalCtx.PrepareOnly {
// Compute a filter expression for the scan node.
convFunc := func(expr parser.VariableExpr) (bool, parser.VariableExpr) {
qval := expr.(*qvalue)
if qval.colRef.table != &s.table {
// TODO(radu): when we will support multiple tables, this will be a valid
// case.
// TODO(radu): when we will support multiple tables, this
// will be a valid case.
panic("scan qvalue refers to unknown table")
}
return true, scan.getQValue(qval.colRef.colIdx)
}

scan.filter, s.filter = splitFilter(s.filter, convFunc)
if s.filter != nil {
// Right now we support only one table, so the entire expression should be
// converted.
// Right now we support only one table, so the entire expression
// should be converted.
panic(fmt.Sprintf("residual filter `%s` (scan filter `%s`)", s.filter, scan.filter))
}
}

plan := p.selectIndex(s, scan, ordering, grouping)
// If we have a reasonable limit, prefer an order matching index even if
// it is not covering - unless we are grouping, in which case the limit
// applies to the grouping results and not to the rows we scan.
var preferOrderMatchingIndex bool
if !grouping && len(ordering) > 0 && limitCount <= 1000-limitOffset {
preferOrderMatchingIndex = true
}
plan := p.selectIndex(s, scan, ordering, grouping, preferOrderMatchingIndex)

// Update s.table with the new plan.
s.table.node = plan
Expand All @@ -311,11 +323,7 @@ func (p *planner) initSelect(
s.ordering = s.computeOrdering(s.table.node.Ordering())

// Wrap this node as necessary.
limitNode, err := p.limit(limit, p.distinct(parsed, sort.wrap(group.wrap(s))))
if err != nil {
return nil, roachpb.NewError(err)
}
return limitNode, nil
return p.limit(limitCount, limitOffset, p.distinct(parsed, sort.wrap(group.wrap(s)))), nil
}

// Initializes the table node, given the parsed select expression
Expand Down Expand Up @@ -645,6 +653,8 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo {
return ordering
}

const nonCoveringIndexPenalty = 10

// selectIndex analyzes the scanNode to determine if there is an index
// available that can fulfill the query with a more restrictive scan.
//
Expand All @@ -656,7 +666,11 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo {
// transformed into a set of spans to scan within the index.
//
// If grouping is true, the ordering is the desired ordering for grouping.
func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping bool) planNode {
//
// If preferOrderMatching is true, we prefer an index that matches the desired
// ordering completely, even if it is not a covering index.
func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping,
preferOrderMatching bool) planNode {
if s.desc.isEmpty() || (s.filter == nil && ordering == nil) {
// No table or no where-clause and no ordering.
s.initOrdering(0)
Expand Down Expand Up @@ -737,7 +751,7 @@ func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrder

if ordering != nil {
for _, c := range candidates {
c.analyzeOrdering(sel, s, ordering)
c.analyzeOrdering(sel, s, ordering, preferOrderMatching)
}
}

Expand Down Expand Up @@ -864,7 +878,7 @@ func (v *indexInfo) init(s *scanNode) {
v.cost += float64(1 + len(v.desc.Columns) - len(v.desc.PrimaryIndex.ColumnIDs))
// Non-covering indexes are significantly more expensive than covering
// indexes.
v.cost *= 10
v.cost *= nonCoveringIndexPenalty
}
}
}
Expand All @@ -891,7 +905,11 @@ func (v *indexInfo) analyzeExprs(exprs []parser.Exprs) {
// analyzeOrdering analyzes the ordering provided by the index and determines
// if it matches the ordering requested by the query. Non-matching orderings
// increase the cost of using the index.
func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering) {
//
// If preferOrderMatching is true, we prefer an index that matches the desired
// ordering completely, even if it is not a covering index.
func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering,
preferOrderMatching bool) {
// Compute the prefix of the index for which we have exact constraints. This
// prefix is inconsequential for ordering because the values are identical.
v.exactPrefix = exactPrefix(v.constraints)
Expand All @@ -918,6 +936,11 @@ func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering co
weight := float64(len(ordering)+1) / float64(match+1)
v.cost *= weight

if match == len(ordering) && preferOrderMatching {
// Offset the non-covering index cost penalty.
v.cost *= (1.0 / nonCoveringIndexPenalty)
}

if log.V(2) {
log.Infof("%s: analyzeOrdering: weight=%0.2f reverse=%v index=%d requested=%d",
v.index.Name, weight, v.reverse, indexOrdering, ordering)
Expand Down
10 changes: 4 additions & 6 deletions sql/testdata/explain_debug
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,10 @@ EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC
query ITTT
EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC LIMIT 1 OFFSET 1
----
0 /abc/primary/1/'one' NULL PARTIAL
0 /abc/primary/1/'one'/c 1.1 BUFFERED
1 /abc/primary/2/'two' NULL BUFFERED
2 /abc/primary/3/'three' NULL BUFFERED
0 0 (2, 'two', NULL) FILTERED
1 1 (3, 'three', NULL) ROW
0 /abc/foo/'two' /2 PARTIAL
0 /abc/primary/2/'two' NULL FILTERED
1 /abc/foo/'three' /3 PARTIAL
1 /abc/primary/3/'three' NULL ROW

query ITTT
EXPLAIN (DEBUG) SELECT * FROM abc WHERE a = 2
Expand Down
6 changes: 6 additions & 0 deletions sql/testdata/select
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@ SELECT * FROM xyzw LIMIT a
query error argument of OFFSET must not contain variables
SELECT * FROM xyzw OFFSET a

query error negative value for LIMIT
SELECT * FROM xyzw LIMIT -100

query error negative value for OFFSET
SELECT * FROM xyzw OFFSET -100

query error unsupported binary operator: <int> \+ <float>
SELECT * FROM xyzw OFFSET 1 + 0.0

Expand Down
39 changes: 39 additions & 0 deletions sql/testdata/select_non_covering_index
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,42 @@ EXPLAIN SELECT * FROM t WHERE c > 0 AND d = 8
0 index-join
1 scan t@c /1-
1 scan t@primary

# The following testcases verify that when we have a small limit, we prefer an
# order-matching index.

query ITT
EXPLAIN SELECT * FROM t ORDER BY c
----
0 sort +c
1 scan t@primary -

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5
----
0 limit count: 5, offset: 0
1 index-join
2 scan t@c -
2 scan t@primary

query ITT
EXPLAIN SELECT * FROM t ORDER BY c OFFSET 5
----
0 limit count: ALL, offset: 5
1 sort +c
2 scan t@primary -

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5 OFFSET 5
----
0 limit count: 5, offset: 5
1 index-join
2 scan t@c -
2 scan t@primary

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 1000000
----
0 limit count: 1000000, offset: 0
1 sort +c (top 1000000)
2 scan t@primary -