diff --git a/sql/backfill.go b/sql/backfill.go index 838d1636d6e6..bc948d3ea5f2 100644 --- a/sql/backfill.go +++ b/sql/backfill.go @@ -166,7 +166,7 @@ func (p *planner) backfillBatch(b *client.Batch, tableDesc *TableDescriptor) *ro desc: *tableDesc, } scan.initDescDefaults() - rows := p.selectIndex(&selectNode{}, scan, nil, false) + rows := p.selectIndex(&selectNode{}, scan, nil, false, false) // Construct a map from column ID to the index the value appears at within a // row. diff --git a/sql/limit.go b/sql/limit.go index 61412c85e8d9..4e6281a5c6b2 100644 --- a/sql/limit.go +++ b/sql/limit.go @@ -25,60 +25,72 @@ import ( "github.com/cockroachdb/cockroach/util" ) -// limit constructs a limitNode based on the LIMIT and OFFSET clauses. -func (p *planner) limit(limit *parser.Limit, plan planNode) (planNode, error) { +// evalLimit evaluates the Count and Offset fields. If Count is missing, the +// value is MaxInt64. If Offset is missing, the value is 0 +func (p *planner) evalLimit(limit *parser.Limit) (count, offset int64, err error) { + count = math.MaxInt64 + offset = 0 + if limit == nil { - return plan, nil + return count, offset, nil } - var count, offset int64 - data := []struct { - name string - src parser.Expr - dst *int64 - defaultVal int64 + name string + src parser.Expr + dst *int64 }{ - {"LIMIT", limit.Count, &count, math.MaxInt64}, - {"OFFSET", limit.Offset, &offset, 0}, + {"LIMIT", limit.Count, &count}, + {"OFFSET", limit.Offset, &offset}, } for _, datum := range data { - if datum.src == nil { - *datum.dst = datum.defaultVal - } else { + if datum.src != nil { if parser.ContainsVars(datum.src) { - return nil, util.Errorf("argument of %s must not contain variables", datum.name) + return 0, 0, util.Errorf("argument of %s must not contain variables", datum.name) } normalized, err := p.parser.NormalizeExpr(p.evalCtx, datum.src) if err != nil { - return nil, err + return 0, 0, err } dstDatum, err := normalized.Eval(p.evalCtx) if err != nil { - return nil, err + return 0, 0, err } if dstDatum == parser.DNull { - *datum.dst = datum.defaultVal + // Use the default value. continue } if dstDInt, ok := dstDatum.(parser.DInt); ok { - *datum.dst = int64(dstDInt) + val := int64(dstDInt) + if val < 0 { + return 0, 0, fmt.Errorf("negative value for %s", datum.name) + } + *datum.dst = val continue } - return nil, fmt.Errorf("argument of %s must be type %s, not type %s", datum.name, parser.DummyInt.Type(), dstDatum.Type()) + return 0, 0, fmt.Errorf("argument of %s must be type %s, not type %s", + datum.name, parser.DummyInt.Type(), dstDatum.Type()) } } + return count, offset, nil +} + +// limit constructs a limitNode based on the LIMIT and OFFSET clauses. +func (p *planner) limit(count, offset int64, plan planNode) planNode { + if count == math.MaxInt64 && offset == 0 { + return plan + } if count != math.MaxInt64 { plan.SetLimitHint(offset+count, false /* hard */) } - return &limitNode{planNode: plan, count: count, offset: offset}, nil + return &limitNode{planNode: plan, count: count, offset: offset} } type limitNode struct { diff --git a/sql/select.go b/sql/select.go index 6a019cec4184..2d2e03c16530 100644 --- a/sql/select.go +++ b/sql/select.go @@ -199,12 +199,11 @@ func (p *planner) Select(n *parser.Select, autoCommit bool) (planNode, *roachpb. if pberr != nil { return nil, pberr } - var err error - plan, err = p.limit(limit, sort.wrap(plan)) + count, offset, err := p.evalLimit(limit) if err != nil { return nil, roachpb.NewError(err) } - return plan, nil + return p.limit(count, offset, sort.wrap(plan)), nil } } @@ -270,9 +269,15 @@ func (p *planner) initSelect( ordering = sort.Ordering().ordering } + limitCount, limitOffset, err := p.evalLimit(limit) + if err != nil { + return nil, roachpb.NewError(err) + } + if scan, ok := s.table.node.(*scanNode); ok { - // Find the set of columns that we actually need values for. This is an optimization to avoid - // unmarshaling unnecessary values and is also used for index selection. + // Find the set of columns that we actually need values for. This is an + // optimization to avoid unmarshaling unnecessary values and is also + // used for index selection. neededCols := make([]bool, len(s.table.columns)) for i := range neededCols { _, ok := s.qvals[columnRef{&s.table, i}] @@ -280,15 +285,15 @@ func (p *planner) initSelect( } scan.setNeededColumns(neededCols) - // If we are only preparing, the filter expression can contain unexpanded subqueries which - // are not supported by splitFilter. + // If we are only preparing, the filter expression can contain + // unexpanded subqueries which are not supported by splitFilter. if !p.evalCtx.PrepareOnly { // Compute a filter expression for the scan node. convFunc := func(expr parser.VariableExpr) (bool, parser.VariableExpr) { qval := expr.(*qvalue) if qval.colRef.table != &s.table { - // TODO(radu): when we will support multiple tables, this will be a valid - // case. + // TODO(radu): when we will support multiple tables, this + // will be a valid case. panic("scan qvalue refers to unknown table") } return true, scan.getQValue(qval.colRef.colIdx) @@ -296,13 +301,20 @@ func (p *planner) initSelect( scan.filter, s.filter = splitFilter(s.filter, convFunc) if s.filter != nil { - // Right now we support only one table, so the entire expression should be - // converted. + // Right now we support only one table, so the entire expression + // should be converted. panic(fmt.Sprintf("residual filter `%s` (scan filter `%s`)", s.filter, scan.filter)) } } - plan := p.selectIndex(s, scan, ordering, grouping) + // If we have a reasonable limit, prefer an order matching index even if + // it is not covering - unless we are grouping, in which case the limit + // applies to the grouping results and not to the rows we scan. + var preferOrderMatchingIndex bool + if !grouping && len(ordering) > 0 && limitCount <= 1000-limitOffset { + preferOrderMatchingIndex = true + } + plan := p.selectIndex(s, scan, ordering, grouping, preferOrderMatchingIndex) // Update s.table with the new plan. s.table.node = plan @@ -311,11 +323,7 @@ func (p *planner) initSelect( s.ordering = s.computeOrdering(s.table.node.Ordering()) // Wrap this node as necessary. - limitNode, err := p.limit(limit, p.distinct(parsed, sort.wrap(group.wrap(s)))) - if err != nil { - return nil, roachpb.NewError(err) - } - return limitNode, nil + return p.limit(limitCount, limitOffset, p.distinct(parsed, sort.wrap(group.wrap(s)))), nil } // Initializes the table node, given the parsed select expression @@ -645,6 +653,8 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo { return ordering } +const nonCoveringIndexPenalty = 10 + // selectIndex analyzes the scanNode to determine if there is an index // available that can fulfill the query with a more restrictive scan. // @@ -656,7 +666,11 @@ func (s *selectNode) computeOrdering(fromOrder orderingInfo) orderingInfo { // transformed into a set of spans to scan within the index. // // If grouping is true, the ordering is the desired ordering for grouping. -func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping bool) planNode { +// +// If preferOrderMatching is true, we prefer an index that matches the desired +// ordering completely, even if it is not a covering index. +func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrdering, grouping, + preferOrderMatching bool) planNode { if s.desc.isEmpty() || (s.filter == nil && ordering == nil) { // No table or no where-clause and no ordering. s.initOrdering(0) @@ -737,7 +751,7 @@ func (p *planner) selectIndex(sel *selectNode, s *scanNode, ordering columnOrder if ordering != nil { for _, c := range candidates { - c.analyzeOrdering(sel, s, ordering) + c.analyzeOrdering(sel, s, ordering, preferOrderMatching) } } @@ -864,7 +878,7 @@ func (v *indexInfo) init(s *scanNode) { v.cost += float64(1 + len(v.desc.Columns) - len(v.desc.PrimaryIndex.ColumnIDs)) // Non-covering indexes are significantly more expensive than covering // indexes. - v.cost *= 10 + v.cost *= nonCoveringIndexPenalty } } } @@ -891,7 +905,11 @@ func (v *indexInfo) analyzeExprs(exprs []parser.Exprs) { // analyzeOrdering analyzes the ordering provided by the index and determines // if it matches the ordering requested by the query. Non-matching orderings // increase the cost of using the index. -func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering) { +// +// If preferOrderMatching is true, we prefer an index that matches the desired +// ordering completely, even if it is not a covering index. +func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering columnOrdering, + preferOrderMatching bool) { // Compute the prefix of the index for which we have exact constraints. This // prefix is inconsequential for ordering because the values are identical. v.exactPrefix = exactPrefix(v.constraints) @@ -918,6 +936,11 @@ func (v *indexInfo) analyzeOrdering(sel *selectNode, scan *scanNode, ordering co weight := float64(len(ordering)+1) / float64(match+1) v.cost *= weight + if match == len(ordering) && preferOrderMatching { + // Offset the non-covering index cost penalty. + v.cost *= (1.0 / nonCoveringIndexPenalty) + } + if log.V(2) { log.Infof("%s: analyzeOrdering: weight=%0.2f reverse=%v index=%d requested=%d", v.index.Name, weight, v.reverse, indexOrdering, ordering) diff --git a/sql/testdata/explain_debug b/sql/testdata/explain_debug index db817be3d1e5..da2876f102c9 100644 --- a/sql/testdata/explain_debug +++ b/sql/testdata/explain_debug @@ -68,12 +68,10 @@ EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC query ITTT EXPLAIN (DEBUG) SELECT * FROM abc ORDER BY b DESC LIMIT 1 OFFSET 1 ---- -0 /abc/primary/1/'one' NULL PARTIAL -0 /abc/primary/1/'one'/c 1.1 BUFFERED -1 /abc/primary/2/'two' NULL BUFFERED -2 /abc/primary/3/'three' NULL BUFFERED -0 0 (2, 'two', NULL) FILTERED -1 1 (3, 'three', NULL) ROW +0 /abc/foo/'two' /2 PARTIAL +0 /abc/primary/2/'two' NULL FILTERED +1 /abc/foo/'three' /3 PARTIAL +1 /abc/primary/3/'three' NULL ROW query ITTT EXPLAIN (DEBUG) SELECT * FROM abc WHERE a = 2 diff --git a/sql/testdata/select b/sql/testdata/select index 14fc41baee49..80c6c757499a 100644 --- a/sql/testdata/select +++ b/sql/testdata/select @@ -168,6 +168,12 @@ SELECT * FROM xyzw LIMIT a query error argument of OFFSET must not contain variables SELECT * FROM xyzw OFFSET a +query error negative value for LIMIT +SELECT * FROM xyzw LIMIT -100 + +query error negative value for OFFSET +SELECT * FROM xyzw OFFSET -100 + query error unsupported binary operator: \+ SELECT * FROM xyzw OFFSET 1 + 0.0 diff --git a/sql/testdata/select_non_covering_index b/sql/testdata/select_non_covering_index index 86dbb577199b..2d2c1569b227 100644 --- a/sql/testdata/select_non_covering_index +++ b/sql/testdata/select_non_covering_index @@ -84,3 +84,42 @@ EXPLAIN SELECT * FROM t WHERE c > 0 AND d = 8 0 index-join 1 scan t@c /1- 1 scan t@primary + +# The following testcases verify that when we have a small limit, we prefer an +# order-matching index. + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c +---- +0 sort +c +1 scan t@primary - + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5 +---- +0 limit count: 5, offset: 0 +1 index-join +2 scan t@c - +2 scan t@primary + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c OFFSET 5 +---- +0 limit count: ALL, offset: 5 +1 sort +c +2 scan t@primary - + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c LIMIT 5 OFFSET 5 +---- +0 limit count: 5, offset: 5 +1 index-join +2 scan t@c - +2 scan t@primary + +query ITT +EXPLAIN SELECT * FROM t ORDER BY c LIMIT 1000000 +---- +0 limit count: 1000000, offset: 0 +1 sort +c (top 1000000) +2 scan t@primary -