Skip to content

Commit

Permalink
sql: prefer order-matching index if there is a limit
Browse files Browse the repository at this point in the history
In #4925, we observed ineffective planning for a query in the photos app. We
prefer to use the primary index and sort rather than use a non-covering index
which makes sense in general (non-covering indices require an expensive
indexJoin) but in this case we also had a limit. In such a case using the index
would require looking only at the first rows instead of getting all matching
rows and sorting.

In this change we tweak the index selection: if we have a reasonable limit, we
give a "boost" to all indices that match the ordering exactly. The boost exactly
offsets the non-covering index penalty.

In addition to the new tests, I also verified the photo app query in #4925 now
uses the index.

Fixes #5246.
  • Loading branch information
RaduBerinde committed Mar 22, 2016
1 parent 16c80a2 commit aa372ba
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 17 deletions.
2 changes: 1 addition & 1 deletion sql/backfill.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ func (p *planner) backfillBatch(b *client.Batch, tableDesc *TableDescriptor) *ro
desc: *tableDesc,
}
scan.initDescDefaults()
rows := p.selectIndex(&selectNode{}, scan, nil, false)
rows := p.selectIndex(&selectNode{}, scan, nil, false, false)

// Construct a map from column ID to the index the value appears at within a
// row.
Expand Down
6 changes: 5 additions & 1 deletion sql/limit.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ func (p *planner) evalLimit(limit *parser.Limit) (count, offset int64, err error
}

if dstDInt, ok := dstDatum.(parser.DInt); ok {
*datum.dst = int64(dstDInt)
val := int64(dstDInt)
if val < 0 {
return 0, 0, fmt.Errorf("negative value for %s", datum.name)
}
*datum.dst = val
continue
}

Expand Down
41 changes: 32 additions & 9 deletions sql/select.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ func (p *planner) initSelect(
ordering = sort.Ordering().ordering
}

limitCount, limitOffset, err := p.evalLimit(limit)
if err != nil {
return nil, roachpb.NewError(err)
}

if scan, ok := s.table.node.(*scanNode); ok {
// Find the set of columns that we actually need values for. This is an
// optimization to avoid unmarshaling unnecessary values and is also
Expand Down Expand Up @@ -302,7 +307,14 @@ func (p *planner) initSelect(
}
}

plan := p.selectIndex(s, scan, ordering, grouping)
// If we have a reasonable limit, prefer an order matching index even if
// it is not covering - unless we are grouping, in which case the limit
// applies to the grouping results and not to the rows we scan.
var preferOrderMatchingIndex bool
if !grouping && len(ordering) > 0 && limitCount <= 1000-limitOffset {
preferOrderMatchingIndex = true
}
plan := p.selectIndex(s, scan, ordering, grouping, preferOrderMatchingIndex)

// Update s.table with the new plan.
s.table.node = plan
Expand All @@ -311,10 +323,6 @@ func (p *planner) initSelect(
s.ordering = s.computeOrdering(s.table.node.Ordering())

// Wrap this node as necessary.
limitCount, limitOffset, err := p.evalLimit(limit)
if err != nil {
return nil, roachpb.NewError(err)
}
return p.limit(limitCount, limitOffset, p.distinct(parsed, sort.wrap(group.wrap(s)))), nil
}

Expand Down Expand Up @@ -645,6 +653,8 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo {
return ordering
}

const nonCoveringIndexPenalty = 10

// selectIndex analyzes the scanNode to determine if there is an index
// available that can fulfill the query with a more restrictive scan.
//
Expand All @@ -656,7 +666,11 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo {
// transformed into a set of spans to scan within the index.
//
// If grouping is true, the ordering is the desired ordering for grouping.
func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping bool) planNode {
//
// If preferOrderMatching is true, we prefer an index that matches the desired
// ordering completely, even if it is not a covering index.
func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping,
preferOrderMatching bool) planNode {
if s.desc.isEmpty() || (s.filter == nil && ordering == nil) {
// No table or no where-clause and no ordering.
s.initOrdering(0)
Expand Down Expand Up @@ -737,7 +751,7 @@ func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrder

if ordering != nil {
for _, c := range candidates {
c.analyzeOrdering(sel, s, ordering)
c.analyzeOrdering(sel, s, ordering, preferOrderMatching)
}
}

Expand Down Expand Up @@ -864,7 +878,7 @@ func (v *indexInfo) init(s *scanNode) {
v.cost += float64(1 + len(v.desc.Columns) - len(v.desc.PrimaryIndex.ColumnIDs))
// Non-covering indexes are significantly more expensive than covering
// indexes.
v.cost *= 10
v.cost *= nonCoveringIndexPenalty
}
}
}
Expand All @@ -891,7 +905,11 @@ func (v *indexInfo) analyzeExprs(exprs []parser.Exprs) {
// analyzeOrdering analyzes the ordering provided by the index and determines
// if it matches the ordering requested by the query. Non-matching orderings
// increase the cost of using the index.
func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering) {
//
// If preferOrderMatching is true, we prefer an index that matches the desired
// ordering completely, even if it is not a covering index.
func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering,
preferOrderMatching bool) {
// Compute the prefix of the index for which we have exact constraints. This
// prefix is inconsequential for ordering because the values are identical.
v.exactPrefix = exactPrefix(v.constraints)
Expand All @@ -918,6 +936,11 @@ func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering co
weight := float64(len(ordering)+1) / float64(match+1)
v.cost *= weight

if match == len(ordering) && preferOrderMatching {
// Offset the non-covering index cost penalty.
v.cost *= (1.0 / nonCoveringIndexPenalty)
}

if log.V(2) {
log.Infof("%s: analyzeOrdering: weight=%0.2f reverse=%v index=%d requested=%d",
v.index.Name, weight, v.reverse, indexOrdering, ordering)
Expand Down
10 changes: 4 additions & 6 deletions sql/testdata/explain_debug
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,10 @@ EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC
query ITTT
EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC LIMIT 1 OFFSET 1
----
0 /abc/primary/1/'one' NULL PARTIAL
0 /abc/primary/1/'one'/c 1.1 BUFFERED
1 /abc/primary/2/'two' NULL BUFFERED
2 /abc/primary/3/'three' NULL BUFFERED
0 0 (2, 'two', NULL) FILTERED
1 1 (3, 'three', NULL) ROW
0 /abc/foo/'two' /2 PARTIAL
0 /abc/primary/2/'two' NULL FILTERED
1 /abc/foo/'three' /3 PARTIAL
1 /abc/primary/3/'three' NULL ROW

query ITTT
EXPLAIN (DEBUG) SELECT * FROM abc WHERE a = 2
Expand Down
6 changes: 6 additions & 0 deletions sql/testdata/select
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,12 @@ SELECT * FROM xyzw LIMIT a
query error argument of OFFSET must not contain variables
SELECT * FROM xyzw OFFSET a

query error negative value for LIMIT
SELECT * FROM xyzw LIMIT -100

query error negative value for OFFSET
SELECT * FROM xyzw OFFSET -100

query error unsupported binary operator: <int> \+ <float>
SELECT * FROM xyzw OFFSET 1 + 0.0

Expand Down
39 changes: 39 additions & 0 deletions sql/testdata/select_non_covering_index
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,42 @@ EXPLAIN SELECT * FROM t WHERE c > 0 AND d = 8
0 index-join
1 scan t@c /1-
1 scan t@primary

# The following testcases verify that when we have a small limit, we prefer an
# order-matching index.

query ITT
EXPLAIN SELECT * FROM t ORDER BY c
----
0 sort +c
1 scan t@primary -

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5
----
0 limit count: 5, offset: 0
1 index-join
2 scan t@c -
2 scan t@primary

query ITT
EXPLAIN SELECT * FROM t ORDER BY c OFFSET 5
----
0 limit count: ALL, offset: 5
1 sort +c
2 scan t@primary -

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5 OFFSET 5
----
0 limit count: 5, offset: 5
1 index-join
2 scan t@c -
2 scan t@primary

query ITT
EXPLAIN SELECT * FROM t ORDER BY c LIMIT 1000000
----
0 limit count: 1000000, offset: 0
1 sort +c (top 1000000)
2 scan t@primary -

0 comments on commit aa372ba

Please sign in to comment.