From 9ece68131d035e23d9ed59cccdcdfda52ef54372 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Mon, 21 Mar 2016 14:45:10 -0400 Subject: [PATCH] sql: prefer order-matching index if there is a limit In #4925, we observed ineffective planning for a query in the photos app. We prefer to use the primary index and sort rather than use a non-covering index which makes sense in general (non-covering indices require an expensive indexJoin) but in this case we also had a limit. In such a case using the index would require looking only at the first rows instead of getting all matching rows and sorting. In this change we tweak the index selection: if we have a reasonable limit, we give a "boost" to all indices that match the ordering exactly. The boost exactly offsets the non-covering index penalty. In addition to the new tests, I also verified the photo app query in #4925 now uses the index. Fixes #5246. --- sql/backfill.go | 2 +- sql/limit.go | 6 +++- sql/select.go | 41 ++++++++++++++++++++------ sql/testdata/explain_debug | 10 +++---- sql/testdata/select_non_covering_index | 39 ++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 17 deletions(-) diff --git a/sql/backfill.go b/sql/backfill.go index 838d1636d6e6..bc948d3ea5f2 100644 --- a/sql/backfill.go +++ b/sql/backfill.go @@ -166,7 +166,7 @@ func (p *planner) backfillBatch(b *client.Batch, tableDesc *TableDescriptor) *ro desc: *tableDesc, } scan.initDescDefaults() - rows := p.selectIndex(&selectNode{}, scan, nil, false) + rows := p.selectIndex(&selectNode{}, scan, nil, false, false) // Construct a map from column ID to the index the value appears at within a // row. diff --git a/sql/limit.go b/sql/limit.go index 18ca117dcf5a..4e6281a5c6b2 100644 --- a/sql/limit.go +++ b/sql/limit.go @@ -65,7 +65,11 @@ func (p *planner) evalLimit(limit *parser.Limit) (count, offset int64, err error } if dstDInt, ok := dstDatum.(parser.DInt); ok { - *datum.dst = int64(dstDInt) + val := int64(dstDInt) + if val < 0 { + return 0, 0, fmt.Errorf("negative value for %s", datum.name) + } + *datum.dst = val continue } diff --git a/sql/select.go b/sql/select.go index 9742257448ad..2d2e03c16530 100644 --- a/sql/select.go +++ b/sql/select.go @@ -269,6 +269,11 @@ func (p *planner) initSelect( ordering = sort.Ordering().ordering } + limitCount, limitOffset, err := p.evalLimit(limit) + if err != nil { + return nil, roachpb.NewError(err) + } + if scan, ok := s.table.node.(*scanNode); ok { // Find the set of columns that we actually need values for. This is an // optimization to avoid unmarshaling unnecessary values and is also @@ -302,7 +307,14 @@ func (p *planner) initSelect( } } - plan := p.selectIndex(s, scan, ordering, grouping) + // If we have a reasonable limit, prefer an order matching index even if + // it is not covering - unless we are grouping, in which case the limit + // applies to the grouping results and not to the rows we scan. + var preferOrderMatchingIndex bool + if !grouping && len(ordering) > 0 && limitCount <= 1000-limitOffset { + preferOrderMatchingIndex = true + } + plan := p.selectIndex(s, scan, ordering, grouping, preferOrderMatchingIndex) // Update s.table with the new plan. s.table.node = plan @@ -311,10 +323,6 @@ func (p *planner) initSelect( s.ordering = s.computeOrdering(s.table.node.Ordering()) // Wrap this node as necessary. - limitCount, limitOffset, err := p.evalLimit(limit) - if err != nil { - return nil, roachpb.NewError(err) - } return p.limit(limitCount, limitOffset, p.distinct(parsed, sort.wrap(group.wrap(s)))), nil } @@ -645,6 +653,8 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo { return ordering } +const nonCoveringIndexPenalty = 10 + // selectIndex analyzes the scanNode to determine if there is an index // available that can fulfill the query with a more restrictive scan. // @@ -656,7 +666,11 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo { // transformed into a set of spans to scan within the index. // // If grouping is true, the ordering is the desired ordering for grouping. -func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping bool) planNode { +// +// If preferOrderMatching is true, we prefer an index that matches the desired +// ordering completely, even if it is not a covering index. +func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping, + preferOrderMatching bool) planNode { if s.desc.isEmpty() || (s.filter == nil && ordering == nil) { // No table or no where-clause and no ordering. s.initOrdering(0) @@ -737,7 +751,7 @@ func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrder if ordering != nil { for _, c := range candidates { - c.analyzeOrdering(sel, s, ordering) + c.analyzeOrdering(sel, s, ordering, preferOrderMatching) } } @@ -864,7 +878,7 @@ func (v *indexInfo) init(s *scanNode) { v.cost += float64(1 + len(v.desc.Columns) - len(v.desc.PrimaryIndex.ColumnIDs)) // Non-covering indexes are significantly more expensive than covering // indexes. - v.cost *= 10 + v.cost *= nonCoveringIndexPenalty } } } @@ -891,7 +905,11 @@ func (v *indexInfo) analyzeExprs(exprs []parser.Exprs) { // analyzeOrdering analyzes the ordering provided by the index and determines // if it matches the ordering requested by the query. Non-matching orderings // increase the cost of using the index. -func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering) { +// +// If preferOrderMatching is true, we prefer an index that matches the desired +// ordering completely, even if it is not a covering index. +func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering, + preferOrderMatching bool) { // Compute the prefix of the index for which we have exact constraints. This // prefix is inconsequential for ordering because the values are identical. v.exactPrefix = exactPrefix(v.constraints) @@ -918,6 +936,11 @@ func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering co weight := float64(len(ordering)+1) / float64(match+1) v.cost *= weight + if match == len(ordering) && preferOrderMatching { + // Offset the non-covering index cost penalty. + v.cost *= (1.0 / nonCoveringIndexPenalty) + } + if log.V(2) { log.Infof("%s: analyzeOrdering: weight=%0.2f reverse=%v index=%d requested=%d", v.index.Name, weight, v.reverse, indexOrdering, ordering) diff --git a/sql/testdata/explain_debug b/sql/testdata/explain_debug index db817be3d1e5..da2876f102c9 100644 --- a/sql/testdata/explain_debug +++ b/sql/testdata/explain_debug @@ -68,12 +68,10 @@ EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC query ITTT EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC LIMIT 1 OFFSET 1 ---- -0 /abc/primary/1/'one' NULL PARTIAL -0 /abc/primary/1/'one'/c 1.1 BUFFERED -1 /abc/primary/2/'two' NULL BUFFERED -2 /abc/primary/3/'three' NULL BUFFERED -0 0 (2, 'two', NULL) FILTERED -1 1 (3, 'three', NULL) ROW +0 /abc/foo/'two' /2 PARTIAL +0 /abc/primary/2/'two' NULL FILTERED +1 /abc/foo/'three' /3 PARTIAL +1 /abc/primary/3/'three' NULL ROW query ITTT EXPLAIN (DEBUG) SELECT * FROM abc WHERE a = 2 diff --git a/sql/testdata/select_non_covering_index b/sql/testdata/select_non_covering_index index 86dbb577199b..2d2c1569b227 100644 --- a/sql/testdata/select_non_covering_index +++ b/sql/testdata/select_non_covering_index @@ -84,3 +84,42 @@ EXPLAIN SELECT * FROM t WHERE c > 0 AND d = 8 0 index-join 1 scan t@c /1- 1 scan t@primary + +# The following testcases verify that when we have a small limit, we prefer an +# order-matching index. + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c +---- +0 sort +c +1 scan t@primary - + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5 +---- +0 limit count: 5, offset: 0 +1 index-join +2 scan t@c - +2 scan t@primary + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c OFFSET 5 +---- +0 limit count: ALL, offset: 5 +1 sort +c +2 scan t@primary - + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5 OFFSET 5 +---- +0 limit count: 5, offset: 5 +1 index-join +2 scan t@c - +2 scan t@primary + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c LIMIT 1000000 +---- +0 limit count: 1000000, offset: 0 +1 sort +c (top 1000000) +2 scan t@primary -