sql: prefer order-matching index if there is a limit

In #4925, we observed ineffective planning for a query in the photos app. We prefer to use the primary index and sort rather than use a non-covering index which makes sense in general (non-covering indices require and expensive indexJoin) but in this case we also had a limit. In such a case using the index would require looking only at the first rows instead of getting all matching rows and sorting. In this change we tweak the index selection: if we have a reasonable limit, we give a "boost" to all indices that match the ordering exactly. The boost exactly offsets the non-covering index penalty. In addition to the new tests, I also verified the photo app query in #4925 now uses the index. Fixes #5246.
cockroachdb · Mar 21, 2016 · 56b9366 · 56b9366
1 parent 16c80a2
commit 56b9366
Show file tree

Hide file tree

Showing 4 changed files with 67 additions and 12 deletions.
diff --git a/sql/backfill.go b/sql/backfill.go
@@ -166,7 +166,7 @@ func (p *planner) backfillBatch(b *client.Batch, tableDesc *TableDescriptor) *ro
 			desc:    *tableDesc,
 		}
 		scan.initDescDefaults()
-		rows := p.selectIndex(&selectNode{}, scan, nil, false)
+		rows := p.selectIndex(&selectNode{}, scan, nil, false, false)
 
 		// Construct a map from column ID to the index the value appears at within a
 		// row.

diff --git a/sql/select.go b/sql/select.go
@@ -19,6 +19,7 @@ package sql
 import (
 	"bytes"
 	"fmt"
+	"math"
 	"reflect"
 	"sort"
 
@@ -269,6 +270,8 @@ func (p *planner) initSelect(
 		ordering = sort.Ordering().ordering
 	}
 
+	limitCount, limitOffset, err := p.evalLimit(limit)
+
 	if scan, ok := s.table.node.(*scanNode); ok {
 		// Find the set of columns that we actually need values for. This is an
 		// optimization to avoid unmarshaling unnecessary values and is also
@@ -302,7 +305,14 @@ func (p *planner) initSelect(
 			}
 		}
 
-		plan := p.selectIndex(s, scan, ordering, grouping)
+		// If we are not grouping and we have a reasonable limit, prefer an order
+		// matching index even if it is not covering.
+		var preferOrderMatchingIndex bool
+		if !grouping && len(ordering) > 0 && limitCount != math.MaxInt64 &&
+			limitCount+limitOffset <= 1000 {
+			preferOrderMatchingIndex = true
+		}
+		plan := p.selectIndex(s, scan, ordering, grouping, preferOrderMatchingIndex)
 
 		// Update s.table with the new plan.
 		s.table.node = plan
@@ -311,7 +321,6 @@ func (p *planner) initSelect(
 	s.ordering = s.computeOrdering(s.table.node.Ordering())
 
 	// Wrap this node as necessary.
-	limitCount, limitOffset, err := p.evalLimit(limit)
 	if err != nil {
 		return nil, roachpb.NewError(err)
 	}
@@ -656,7 +665,11 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo {
 // transformed into a set of spans to scan within the index.
 //
 // If grouping is true, the ordering is the desired ordering for grouping.
-func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping bool) planNode {
+//
+// If preferOrderMatching is true, we prefer an index that matches the desired
+// ordering completely, even if it is not a covering index.
+func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping,
+	preferOrderMatching bool) planNode {
 	if s.desc.isEmpty() || (s.filter == nil && ordering == nil) {
 		// No table or no where-clause and no ordering.
 		s.initOrdering(0)
@@ -737,7 +750,7 @@ func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrder
 
 	if ordering != nil {
 		for _, c := range candidates {
-			c.analyzeOrdering(sel, s, ordering)
+			c.analyzeOrdering(sel, s, ordering, preferOrderMatching)
 		}
 	}
 
@@ -891,7 +904,8 @@ func (v *indexInfo) analyzeExprs(exprs []parser.Exprs) {
 // analyzeOrdering analyzes the ordering provided by the index and determines
 // if it matches the ordering requested by the query. Non-matching orderings
 // increase the cost of using the index.
-func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering) {
+func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering,
+	preferOrderMatching bool) {
 	// Compute the prefix of the index for which we have exact constraints. This
 	// prefix is inconsequential for ordering because the values are identical.
 	v.exactPrefix = exactPrefix(v.constraints)
@@ -918,6 +932,10 @@ func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering co
 	weight := float64(len(ordering)+1) / float64(match+1)
 	v.cost *= weight
 
+	if match == len(ordering) && preferOrderMatching {
+		v.cost *= 0.1
+	}
+
 	if log.V(2) {
 		log.Infof("%s: analyzeOrdering: weight=%0.2f reverse=%v index=%d requested=%d",
 			v.index.Name, weight, v.reverse, indexOrdering, ordering)

diff --git a/sql/testdata/explain_debug b/sql/testdata/explain_debug
@@ -68,12 +68,10 @@ EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC
 query ITTT
 EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC LIMIT 1 OFFSET 1
 ----
-0  /abc/primary/1/'one'    NULL               PARTIAL
-0  /abc/primary/1/'one'/c  1.1                BUFFERED
-1  /abc/primary/2/'two'    NULL               BUFFERED
-2  /abc/primary/3/'three'  NULL               BUFFERED
-0  0                       (2, 'two', NULL)   FILTERED
-1  1                       (3, 'three', NULL) ROW
+0  /abc/foo/'two'          /2    PARTIAL   
+0  /abc/primary/2/'two'    NULL  FILTERED  
+1  /abc/foo/'three'        /3    PARTIAL   
+1  /abc/primary/3/'three'  NULL  ROW       
 
 query ITTT
 EXPLAIN (DEBUG) SELECT * FROM abc WHERE a = 2

diff --git a/sql/testdata/select_non_covering_index b/sql/testdata/select_non_covering_index
@@ -84,3 +84,42 @@ EXPLAIN SELECT * FROM t WHERE c > 0 AND d = 8
 0 index-join
 1 scan       t@c /1-
 1 scan       t@primary
+
+# The following testcases verify that when we have a small limit, we prefer an
+# order-matching index.
+
+query ITT
+EXPLAIN SELECT * FROM t ORDER BY c
+----
+0 sort       +c
+1 scan       t@primary -
+
+query ITT
+EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5
+----
+0 limit      count: 5, offset: 0
+1 index-join
+2 scan       t@c -
+2 scan       t@primary
+
+query ITT
+EXPLAIN SELECT * FROM t ORDER BY c OFFSET 5
+----
+0 limit      count: ALL, offset: 5
+1 sort       +c
+2 scan       t@primary -
+
+query ITT
+EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5 OFFSET 5
+----
+0 limit      count: 5, offset: 5
+1 index-join
+2 scan       t@c -
+2 scan       t@primary
+
+query ITT
+EXPLAIN SELECT * FROM t ORDER BY c LIMIT 1000000
+----
+0 limit      count: 1000000, offset: 0
+1 sort       +c (top 1000000)
+2 scan       t@primary -