diff --git a/pkg/sql/opt/exec/execbuilder/testdata/topk b/pkg/sql/opt/exec/execbuilder/testdata/topk index 679d26419049..a47091eb37da 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/topk +++ b/pkg/sql/opt/exec/execbuilder/testdata/topk @@ -130,12 +130,14 @@ vectorized: true │ └── • index join │ columns: (k, v, w) + │ ordering: +v │ estimated row count: 1,000 (missing stats) │ table: t@t_pkey │ key columns: k │ └── • scan columns: (k, v) + ordering: +v estimated row count: 1,000 (missing stats) table: t@v spans: FULL SCAN diff --git a/pkg/sql/opt/memo/testdata/memo b/pkg/sql/opt/memo/testdata/memo index 017b9250f0df..5ff965dbae97 100644 --- a/pkg/sql/opt/memo/testdata/memo +++ b/pkg/sql/opt/memo/testdata/memo @@ -136,7 +136,7 @@ memo (optimized, ~20KB, required=[presentation: y:2,x:5,c:10] [ordering: +2]) │ └── [] │ ├── best: (project G2 G3 y x) │ └── cost: 1754.72 - ├── G2: (limit G4 G5 ordering=+2) (top-k G4 &{10 +2}) + ├── G2: (limit G4 G5 ordering=+2) (top-k G4 &{10 +2 }) │ ├── [ordering: +2] │ │ ├── best: (limit G4="[ordering: +2] [limit hint: 10.00]" G5 ordering=+2) │ │ └── cost: 1754.51 diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-geo b/pkg/sql/opt/memo/testdata/stats/inverted-geo index a7aeeb0a3142..583f5a160f80 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-geo +++ b/pkg/sql/opt/memo/testdata/stats/inverted-geo @@ -96,7 +96,7 @@ memo (optimized, ~11KB, required=[presentation: i:1]) │ └── [presentation: i:1] │ ├── best: (project G2 G3 i) │ └── cost: 2652.94 - ├── G2: (limit G4 G5 ordering=+1) (top-k G4 &{1 +1}) + ├── G2: (limit G4 G5 ordering=+1) (top-k G4 &{1 +1 }) │ └── [] │ ├── best: (limit G4="[ordering: +1] [limit hint: 1.00]" G5 ordering=+1) │ └── cost: 2652.92 @@ -200,7 +200,7 @@ memo (optimized, ~11KB, required=[presentation: i:1]) │ └── [presentation: i:1] │ ├── best: (project G2 G3 i) │ └── cost: 14.15 - ├── G2: (limit G4 G5 ordering=+1) (top-k G4 &{1 +1}) + ├── G2: (limit G4 G5 ordering=+1) (top-k G4 &{1 +1 }) │ └── [] │ ├── best: (limit G4="[ordering: +1] [limit hint: 1.00]" G5 ordering=+1) │ └── cost: 14.13 diff --git a/pkg/sql/opt/ops/relational.opt b/pkg/sql/opt/ops/relational.opt index 671310688083..630aa855f3ce 100644 --- a/pkg/sql/opt/ops/relational.opt +++ b/pkg/sql/opt/ops/relational.opt @@ -1042,17 +1042,20 @@ define Offset { Ordering OrderingChoice } -# TopK returns the top K, where K is a constant, rows from the input set according to its -# sort ordering, discarding the remaining rows. The Limit is a constant -# positive integer; the operator returns at most Limit rows. Rows can be sorted by one -# or more of the input columns, each of which can be sorted in either ascending -# or descending order. See the Ordering field in the PhysicalProps struct. +# TopK returns the top K, where K is a constant, rows from the input set +# according to its sort ordering, discarding the remaining rows. The Limit is a +# constant positive integer; the operator returns at most Limit rows. Rows can +# be sorted by one or more of the input columns, each of which can be sorted in +# either ascending or descending order. See the Ordering field in the +# PhysicalProps struct. # # Unlike the Limit relational operator, TopK does not require its input to be -# ordered. TopK can be used to substitute a Limit that requires its input to be -# ordered and performs best when the input is not already ordered. TopK scans the -# input, storing the K rows that best meet the ordering requirement in a max -# heap, then sorts the K rows. +# ordered. However, if the input is known to have a partial ordering of the +# required ordering, TopK can take advantage of optimizations. TopK can be used +# to substitute a Limit that requires its input to be ordered and performs best +# when the input is not already fully ordered. TopK scans the input, storing the +# K rows that best meet the ordering requirement in a max heap, then sorts the K +# rows. [Relational] define TopK { Input RelExpr @@ -1062,7 +1065,14 @@ define TopK { [Private] define TopKPrivate { K int64 + + # Ordering is the required order in which the K rows should be sorted when output. Ordering OrderingChoice + + # PartialOrdering is an optional ordering imposed on the input that is + # a partial order of Ordering and allows TopK to take advantage of partial + # ordering optimizations. + PartialOrdering OrderingChoice } # Max1Row enforces that its input must return at most one row. If the input diff --git a/pkg/sql/opt/ordering/ordering.go b/pkg/sql/opt/ordering/ordering.go index eafe8c817895..8296db950624 100644 --- a/pkg/sql/opt/ordering/ordering.go +++ b/pkg/sql/opt/ordering/ordering.go @@ -176,7 +176,7 @@ func init() { } funcMap[opt.TopKOp] = funcs{ canProvideOrdering: topKCanProvideOrdering, - buildChildReqOrdering: noChildReqOrdering, + buildChildReqOrdering: topKBuildChildReqOrdering, buildProvidedOrdering: topKBuildProvided, } funcMap[opt.ScalarGroupByOp] = funcs{ diff --git a/pkg/sql/opt/ordering/topk.go b/pkg/sql/opt/ordering/topk.go index 2714bb4c02eb..9ef01fdde99e 100644 --- a/pkg/sql/opt/ordering/topk.go +++ b/pkg/sql/opt/ordering/topk.go @@ -26,3 +26,12 @@ func topKBuildProvided(expr memo.RelExpr, required *props.OrderingChoice) opt.Or // TopK orders its own input, so the ordering it provides is its own. return trimProvided(expr.(*memo.TopKExpr).Ordering.ToOrdering(), required, &expr.Relational().FuncDeps) } + +func topKBuildChildReqOrdering( + parent memo.RelExpr, required *props.OrderingChoice, childIdx int, +) props.OrderingChoice { + // If Top K has an input ordering to impose on its child for partial order + // optimizations, then require the child to have that ordering. + topK := parent.(*memo.TopKExpr) + return topK.PartialOrdering +} diff --git a/pkg/sql/opt/xform/coster.go b/pkg/sql/opt/xform/coster.go index 1ee039bf3fe2..dc6a4922c547 100644 --- a/pkg/sql/opt/xform/coster.go +++ b/pkg/sql/opt/xform/coster.go @@ -570,9 +570,14 @@ func (c *coster) ComputeCost(candidate memo.RelExpr, required *physical.Required func (c *coster) computeTopKCost(topk *memo.TopKExpr, required *physical.Required) memo.Cost { rel := topk.Relational() - inputRowCount := topk.Input.Relational().Stats.RowCount outputRowCount := rel.Stats.RowCount + inputRowCount := topk.Input.Relational().Stats.RowCount + if !required.Ordering.Any() { + // When there is a partial ordering of the input rows' sort columns, we may + // be able to reduce the number of input rows needed to find the top K rows. + inputRowCount = topKInputLimitHint(c.mem, topk, inputRowCount, outputRowCount, float64(topk.K)) + } // Add the cost of sorting. // Start with a cost of storing each row; TopK sort only stores K rows in a // max heap. @@ -1256,17 +1261,21 @@ func (c *coster) computeProjectSetCost(projectSet *memo.ProjectSetExpr) memo.Cos return cost } -// countSegments calculates the number of segments that will be used to execute -// the sort. If no input ordering is provided, there's only one segment. -func (c *coster) countSegments(sort *memo.SortExpr) float64 { - if sort.InputOrdering.Any() { - return 1 +// getOrderingColStats returns the column statistic for the columns in the +// OrderingChoice oc. The OrderingChoice should be a member of expr. We include +// the Memo as an argument so that functions that call this function can be used +// both inside and outside the coster. +func getOrderingColStats( + mem *memo.Memo, expr memo.RelExpr, oc props.OrderingChoice, +) *props.ColumnStatistic { + if oc.Any() { + return nil } - stats := sort.Relational().Stats - orderedCols := sort.InputOrdering.ColSet() + stats := expr.Relational().Stats + orderedCols := oc.ColSet() orderedStats, ok := stats.ColStats.Lookup(orderedCols) if !ok { - orderedStats, ok = c.mem.RequestColStat(sort, orderedCols) + orderedStats, ok = mem.RequestColStat(expr, orderedCols) if !ok { // I don't think we can ever get here. Since we don't allow the memo // to be optimized twice, the coster should never be used after @@ -1274,7 +1283,16 @@ func (c *coster) countSegments(sort *memo.SortExpr) float64 { panic(errors.AssertionFailedf("could not request the stats for ColSet %v", orderedCols)) } } + return orderedStats +} +// countSegments calculates the number of segments that will be used to execute +// the sort. If no input ordering is provided, there's only one segment. +func (c *coster) countSegments(sort *memo.SortExpr) float64 { + orderedStats := getOrderingColStats(c.mem, sort, sort.InputOrdering) + if orderedStats == nil { + return 1 + } return orderedStats.DistinctCount } @@ -1572,6 +1590,36 @@ func lookupJoinInputLimitHint(inputRowCount, outputRowCount, outputLimitHint flo return math.Min(inputRowCount, expectedLookupCount) } +// topKInputLimitHint calculates an appropriate limit hint for the input +// to a Top K expression when the input is partially sorted. +func topKInputLimitHint( + mem *memo.Memo, topk *memo.TopKExpr, inputRowCount, outputRowCount, K float64, +) float64 { + if outputRowCount == 0 { + return 0 + } + orderedStats := getOrderingColStats(mem, topk, topk.PartialOrdering) + if orderedStats == nil { + return inputRowCount + } + + // In order to find the top K rows of a partially sorted input, we estimate + // the number of rows we'll need to ingest by rounding up the nearest multiple + // of the number of rows per distinct values to K. For example, let's say we + // have 2000 input rows, 100 distinct values, and a K of 10. If we assume that + // each distinct value is found in the same number of input rows, each + // distinct value has 2000/100 = 20 rowsPerDistinctVal. Processing the rows + // for one distinct value is sufficient to find the top K 10 rows. If K were + // 50 instead, we would need to process more distinct values to find the top + // K, so we need to multiply the rowsPerDistinctVal by the minimum number of + // distinct values to process, which we can find by dividing K by the rows per + // distinct values and rounding up, or ceil(50/20) = 3. So if K is 50, we need + // to process approximately 3 * 20 = 60 rows to find the top 50 rows. + rowsPerDistinctVal := inputRowCount / orderedStats.DistinctCount + expectedRows := math.Ceil(K/rowsPerDistinctVal) * rowsPerDistinctVal + return math.Min(inputRowCount, expectedRows) +} + // lookupExprCost accounts for the extra CPU cost of the lookupExpr. func lookupExprCost(join memo.RelExpr) memo.Cost { lookupExpr, ok := join.(*memo.LookupJoinExpr) diff --git a/pkg/sql/opt/xform/groupby_funcs.go b/pkg/sql/opt/xform/groupby_funcs.go index 429379de6150..4076e590b0ff 100644 --- a/pkg/sql/opt/xform/groupby_funcs.go +++ b/pkg/sql/opt/xform/groupby_funcs.go @@ -186,41 +186,11 @@ func (c *CustomFuncs) GenerateStreamingGroupBy( orders := ordering.DeriveInterestingOrderings(input) intraOrd := private.Ordering for _, ord := range orders { - o := ord.ToOrdering() - // We are looking for a prefix of o that satisfies the intra-group ordering - // if we ignore grouping columns. - oIdx, intraIdx := 0, 0 - for ; oIdx < len(o); oIdx++ { - oCol := o[oIdx].ID() - if private.GroupingCols.Contains(oCol) || intraOrd.Optional.Contains(oCol) { - // Grouping or optional column. - continue - } - - if intraIdx < len(intraOrd.Columns) && - intraOrd.Group(intraIdx).Contains(oCol) && - intraOrd.Columns[intraIdx].Descending == o[oIdx].Descending() { - // Column matches the one in the ordering. - intraIdx++ - continue - } - break - } - if oIdx == 0 || intraIdx < len(intraOrd.Columns) { - // No match. + newOrd, fullPrefix, found := getPrefixFromOrdering(ord.ToOrdering(), intraOrd, input, + func(id opt.ColumnID) bool { return private.GroupingCols.Contains(id) }) + if !found || !fullPrefix { continue } - o = o[:oIdx] - - var newOrd props.OrderingChoice - newOrd.FromOrderingWithOptCols(o, opt.ColSet{}) - - // Simplify the ordering according to the input's FDs. Note that this is not - // necessary for correctness because buildChildPhysicalProps would do it - // anyway, but doing it here once can make things more efficient (and we may - // generate fewer expressions if some of these orderings turn out to be - // equivalent). - newOrd.Simplify(&input.Relational().FuncDeps) newPrivate := *private newPrivate.Ordering = newOrd @@ -420,6 +390,8 @@ func (c *CustomFuncs) GenerateLimitedGroupByScans( // Iterate over all non-inverted and non-partial secondary indexes. var pkCols opt.ColSet var iter scanIndexIter + var sb indexScanBuilder + sb.Init(c, sp.Table) iter.Init(c.e.evalCtx, c.e.f, c.e.mem, &c.im, sp, nil /* filters */, rejectPrimaryIndex|rejectInvertedIndexes) iter.ForEach(func(index cat.Index, filters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool, constProj memo.ProjectionsExpr) { // The iterator only produces pseudo-partial indexes (the predicate is @@ -463,13 +435,11 @@ func (c *CustomFuncs) GenerateLimitedGroupByScans( // If the index is not covering, scan the needed index columns plus // primary key columns. newScanPrivate.Cols.UnionWith(pkCols) - input := c.e.f.ConstructScan(&newScanPrivate) + sb.SetScan(&newScanPrivate) // Construct an IndexJoin operator that provides the columns missing from // the index. - input = c.e.f.ConstructIndexJoin(input, &memo.IndexJoinPrivate{ - Table: sp.Table, - Cols: sp.Cols, - }) + sb.AddIndexJoin(sp.Cols) + input := sb.BuildNewExpr() // Reconstruct the GroupBy and Limit so the new expression in the memo is // equivalent. input = c.e.f.ConstructGroupBy(input, aggs, gp) diff --git a/pkg/sql/opt/xform/index_scan_builder.go b/pkg/sql/opt/xform/index_scan_builder.go index d23a1633bff8..2920d3442d74 100644 --- a/pkg/sql/opt/xform/index_scan_builder.go +++ b/pkg/sql/opt/xform/index_scan_builder.go @@ -175,6 +175,61 @@ func (b *indexScanBuilder) AddIndexJoin(cols opt.ColSet) { } } +// BuildNewExpr constructs the final expression by composing together the various +// expressions that were specified by previous calls to various add methods. +// It is similar to Build, but does not add the expression to the memo group. +// The output expression must be used as input to another memo expression, as +// the output expression is already interned. +// TODO(harding): Refactor with Build to avoid code duplication. +func (b *indexScanBuilder) BuildNewExpr() (output memo.RelExpr) { + // 1. Only scan. + output = b.f.ConstructScan(&b.scanPrivate) + if !b.hasConstProjections() && !b.hasInnerFilters() && !b.hasInvertedFilter() && !b.hasIndexJoin() { + return + } + + // 2. Wrap input in a Project if constant projections were added. + if b.hasConstProjections() { + output = b.f.ConstructProject(output, b.constProjections, b.scanPrivate.Cols) + if !b.hasInnerFilters() && !b.hasInvertedFilter() && !b.hasIndexJoin() { + return + } + } + + // 3. Wrap input in inner filter if it was added. + if b.hasInnerFilters() { + output = b.f.ConstructSelect(output, b.innerFilters) + if !b.hasInvertedFilter() && !b.hasIndexJoin() { + return + } + } + + // 4. Wrap input in inverted filter if it was added. + if b.hasInvertedFilter() { + output = b.f.ConstructInvertedFilter(output, &b.invertedFilterPrivate) + if !b.hasIndexJoin() { + return + } + } + + // 5. Wrap input in index join if it was added. + if b.hasIndexJoin() { + output = b.f.ConstructIndexJoin(output, &b.indexJoinPrivate) + if !b.hasOuterFilters() { + return + } + } + + // 6. Wrap input in outer filter (which must exist at this point). + if !b.hasOuterFilters() { + // indexJoinDef == 0: outerFilters == 0 handled by #1-4 above. + // indexJoinDef != 0: outerFilters == 0 handled by #5 above. + panic(errors.AssertionFailedf("outer filter cannot be 0 at this point")) + } + output = b.f.ConstructSelect(output, b.outerFilters) + return +} + // Build constructs the final memo expression by composing together the various // expressions that were specified by previous calls to various add methods. func (b *indexScanBuilder) Build(grp memo.RelExpr) { diff --git a/pkg/sql/opt/xform/limit_funcs.go b/pkg/sql/opt/xform/limit_funcs.go index 01ed5e63c9b5..9321c1677626 100644 --- a/pkg/sql/opt/xform/limit_funcs.go +++ b/pkg/sql/opt/xform/limit_funcs.go @@ -220,3 +220,148 @@ func (c *CustomFuncs) MakeTopKPrivate( Ordering: ordering, } } + +// GenerateLimitedTopKScans enumerates all non-inverted secondary indexes on the +// given Scan operator's table and generates an alternate Scan operator for +// each index that includes a partial set of needed columns specified in the +// ScanOpDef. An IndexJoin is constructed to add missing columns. A TopK is also +// constructed to make an equivalent expression for the memo. +// +// For cases where the Scan's secondary index covers all needed columns, see +// GenerateIndexScans, which does not construct an IndexJoin. +func (c *CustomFuncs) GenerateLimitedTopKScans( + grp memo.RelExpr, sp *memo.ScanPrivate, tp *memo.TopKPrivate, +) { + required := tp.Ordering + // Iterate over all non-inverted and non-partial secondary indexes. + var pkCols opt.ColSet + var iter scanIndexIter + var sb indexScanBuilder + sb.Init(c, sp.Table) + iter.Init(c.e.evalCtx, c.e.f, c.e.mem, &c.im, sp, nil /* filters */, rejectPrimaryIndex|rejectInvertedIndexes) + iter.ForEach(func(index cat.Index, filters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool, constProj memo.ProjectionsExpr) { + // The iterator only produces pseudo-partial indexes (the predicate is + // true) because no filters are passed to iter.Init to imply a partial + // index predicate. constProj is a projection of constant values based + // on a partial index predicate. It should always be empty because a + // pseudo-partial index cannot hold a column constant. If it is not, we + // panic to avoid performing a logically incorrect transformation. + if len(constProj) != 0 { + panic(errors.AssertionFailedf("expected constProj to be empty")) + } + + // If the secondary index includes the set of needed columns, then this + // case does not need a limited top K and will be covered in + // GenerateIndexScans. + if isCovering { + return + } + + // Calculate the PK columns once. + if pkCols.Empty() { + pkCols = c.PrimaryKeyCols(sp.Table) + } + + // If the first index column and ordering column are not the same, then + // there is no benefit to exploring this index. + if col := sp.Table.ColumnID(index.Column(0).Ordinal()); !required.Columns[0].Group.Contains(col) { + return + } + + // If the index doesn't contain any of the required order columns, then + // there is no benefit to exploring this index. + if !required.Any() && !required.Group(0).Intersects(indexCols) { + return + } + // Scan whatever columns we need which are available from the index. + newScanPrivate := *sp + newScanPrivate.Index = index.Ordinal() + newScanPrivate.Cols = indexCols.Intersection(sp.Cols) + // If the index is not covering, scan the needed index columns plus + // primary key columns. + newScanPrivate.Cols.UnionWith(pkCols) + sb.SetScan(&newScanPrivate) + // Construct an IndexJoin operator that provides the columns missing from + // the index. + sb.AddIndexJoin(sp.Cols) + input := sb.BuildNewExpr() + // Use the overlapping indexes and required ordering. + newPrivate := *tp + grp.Memo().AddTopKToGroup(&memo.TopKExpr{Input: input, TopKPrivate: newPrivate}, grp) + }) +} + +// getPrefixFromOrdering returns an OrderingChoice that holds the prefix +// of Ordering o that satisfies part of the required OrderingChoice intraOrd, +// a bool indicating whether the entire Ordering o was satisfied, and a bool +// indicating whether a prefix of any kind was found. +// isOptional is a function that allows the caller to impose additional +// constraints on columns that are considered optional, and should return true +// if the column is optional. +func getPrefixFromOrdering( + o opt.Ordering, + intraOrd props.OrderingChoice, + input memo.RelExpr, + isOptional func(id opt.ColumnID) bool, +) (newOrd props.OrderingChoice, isFullPrefix bool, found bool) { + // We are looking for a prefix of o that satisfies part of the required ordering + oIdx, intraIdx := 0, 0 + for ; oIdx < len(o); oIdx++ { + oCol := o[oIdx].ID() + if intraOrd.Optional.Contains(oCol) || isOptional(oCol) { + // Optional column. + continue + } + + if intraIdx < len(intraOrd.Columns) && + intraOrd.Group(intraIdx).Contains(oCol) && + intraOrd.Columns[intraIdx].Descending == o[oIdx].Descending() { + // Column matches the one in the ordering. + intraIdx++ + continue + } + break + } + isFullPrefix = intraIdx == len(intraOrd.Columns) + if oIdx == 0 { + // No match. + return newOrd, isFullPrefix, false + } + o = o[:oIdx] + + newOrd.FromOrderingWithOptCols(o, opt.ColSet{}) + + // Simplify the ordering according to the input's FDs. Note that this is not + // necessary for correctness because buildChildPhysicalProps would do it + // anyway, but doing it here once can make things more efficient (and we may + // generate fewer expressions if some of these orderings turn out to be + // equivalent). + newOrd.Simplify(&input.Relational().FuncDeps) + return newOrd, isFullPrefix, true +} + +// GeneratePartialOrderTopK generates TopK expressions with more specific orderings +// based on the interesting orderings property. This enables the optimizer to +// explore TopK with partially ordered input columns. +func (c *CustomFuncs) GeneratePartialOrderTopK( + grp memo.RelExpr, input memo.RelExpr, private *memo.TopKPrivate, +) { + orders := ordering.DeriveInterestingOrderings(input) + intraOrd := private.Ordering + for _, ord := range orders { + newOrd, fullPrefix, found := getPrefixFromOrdering(ord.ToOrdering(), intraOrd, input, func(id opt.ColumnID) bool { + return false + }) + // We don't need to generate a new expression if no prefix was found or the + // prefix encompasses the entire ordering, since that would be a full, not + // partial, order. + if !found || fullPrefix { + continue + } + + newPrivate := *private + newPrivate.PartialOrdering = newOrd + + grp.Memo().AddTopKToGroup(&memo.TopKExpr{Input: input, TopKPrivate: newPrivate}, grp) + } +} diff --git a/pkg/sql/opt/xform/physical_props.go b/pkg/sql/opt/xform/physical_props.go index c0fe5a092511..6afaab5722e2 100644 --- a/pkg/sql/opt/xform/physical_props.go +++ b/pkg/sql/opt/xform/physical_props.go @@ -166,6 +166,24 @@ func BuildChildPhysicalProps( case opt.OrdinalityOp, opt.ProjectOp, opt.ProjectSetOp: childProps.LimitHint = parentProps.LimitHint + + case opt.TopKOp: + if parentProps.Ordering.Any() { + break + } + outputRows := parent.Relational().Stats.RowCount + topk := parent.(*memo.TopKExpr) + k := float64(topk.K) + if outputRows == 0 || outputRows < k { + break + } + if input, ok := parent.Child(nth).(memo.RelExpr); ok { + inputRows := input.Relational().Stats.RowCount + + if limitHint := topKInputLimitHint(mem, topk, inputRows, outputRows, k); limitHint < inputRows { + childProps.LimitHint = limitHint + } + } } if childProps.LimitHint < 0 { diff --git a/pkg/sql/opt/xform/rules/limit.opt b/pkg/sql/opt/xform/rules/limit.opt index 4e04dd595d44..f62977077bce 100644 --- a/pkg/sql/opt/xform/rules/limit.opt +++ b/pkg/sql/opt/xform/rules/limit.opt @@ -124,3 +124,33 @@ ) => (TopK $input (MakeTopKPrivate $limit $ordering)) + +# GenerateLimitedTopKScans generates a set of Scan alternatives for +# each matching index on the scanned table, and an IndexJoin to supply columns +# missing from the index. This differs from GenerateIndexScans, which does not +# generate index joins for non-covering indexes. +# +# This rule is useful when there is an index that provides an ordering that +# partially satisfies TopK's ordering. This lets us explore expressions that +# allow TopK to process a potentially limited number of rows due to its input +# being already partially ordered, and allows TopK to pass on a limit hint to +# its input expressions so that they may not have to process all input rows, +# either. +[GenerateLimitedTopKScans, Explore] +(TopK + (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate)) + $topKPrivate:* +) +=> +(GenerateLimitedTopKScans $scanPrivate $topKPrivate) + +# GeneratePartialOrderTopK generates Top K expressions with a partial input +# ordering using the interesting ordering property. This is useful to explore +# expressions that allow TopK to potentially process fewer rows, which it can +# do if the input is already partially ordered. It also allows TopK to pass on +# a limit hint to its input expressions so that they may not have to process +# their entire input, either. +[GeneratePartialOrderTopK, Explore] +(TopK $input:* $private:*) +=> +(GeneratePartialOrderTopK $input $private) diff --git a/pkg/sql/opt/xform/testdata/coster/limit b/pkg/sql/opt/xform/testdata/coster/limit index dc4ed4877eb4..32596a581ee2 100644 --- a/pkg/sql/opt/xform/testdata/coster/limit +++ b/pkg/sql/opt/xform/testdata/coster/limit @@ -29,7 +29,7 @@ offset ├── index-join a │ ├── columns: x:1!null y:2!null z:3 s:4!null │ ├── cardinality: [0 - 1020] - │ ├── stats: [rows=10] + │ ├── stats: [rows=10, distinct(4)=9.5617925, null(4)=0] │ ├── cost: 85.9729294 │ ├── key: (1) │ ├── fd: ()-->(2), (1)-->(3,4) @@ -38,7 +38,7 @@ offset │ ├── columns: x:1!null y:2!null s:4!null │ ├── internal-ordering: +4,-1 opt(2) │ ├── cardinality: [0 - 1020] - │ ├── stats: [rows=10] + │ ├── stats: [rows=10, distinct(4)=9.5617925, null(4)=0] │ ├── cost: 25.3629294 │ ├── key: (1) │ ├── fd: ()-->(2), (1)-->(4) @@ -61,3 +61,48 @@ offset │ │ └── ordering: +4 opt(2) [actual: +4] │ └── 1020 └── 1000 + +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["y"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 10000, + "distinct_count": 1000 + } +]' +---- + +# The limit hint from the TopK leads to a lower cost for the index join and +# index scan. +opt +SELECT * FROM a +ORDER BY y, z +LIMIT 10 +---- +top-k + ├── columns: x:1!null y:2 z:3 s:4!null + ├── internal-ordering: +2,+3 + ├── k: 10 + ├── cardinality: [0 - 10] + ├── stats: [rows=10, distinct(2)=9.95511979, null(2)=0] + ├── cost: 7836.39841 + ├── key: (1) + ├── fd: (1)-->(2-4) + ├── ordering: +2,+3 + └── index-join a + ├── columns: x:1!null y:2 z:3 s:4!null + ├── stats: [rows=10000] + ├── cost: 7744.81875 + ├── key: (1) + ├── fd: (1)-->(2-4) + ├── ordering: +2 + ├── limit hint: 1004.51 + └── scan a@a_y_s_idx + ├── columns: x:1!null y:2 s:4!null + ├── stats: [rows=10000] + ├── cost: 1078.79875 + ├── key: (1) + ├── fd: (1)-->(2,4) + ├── ordering: +2 + └── limit hint: 1004.51 diff --git a/pkg/sql/opt/xform/testdata/coster/topk b/pkg/sql/opt/xform/testdata/coster/topk index 4a63abf48921..1f21dda670e5 100644 --- a/pkg/sql/opt/xform/testdata/coster/topk +++ b/pkg/sql/opt/xform/testdata/coster/topk @@ -97,6 +97,48 @@ scalar-group-by └── min [as=min:7, outer=(6)] └── column6:6 +exec-ddl +ALTER TABLE a INJECT STATISTICS '[ + { + "columns": ["j"], + "created_at": "2019-02-08 04:10:40.001179+00:00", + "row_count": 10000, + "distinct_count": 1000 + } +]' +---- + +# A partially ordered top-k has the lowest cost and propagates limit hints. +opt +SELECT * FROM a ORDER BY j, i LIMIT 10 +---- +top-k + ├── columns: k:1!null i:2 j:3 + ├── internal-ordering: +3,+2 + ├── k: 10 + ├── cardinality: [0 - 10] + ├── stats: [rows=10, distinct(3)=9.95511979, null(3)=0] + ├── cost: 7805.20825 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── ordering: +3,+2 + └── index-join a + ├── columns: k:1!null i:2 j:3 + ├── stats: [rows=10000] + ├── cost: 7713.72858 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── ordering: +3 + ├── limit hint: 1004.51 + └── scan a@j + ├── columns: k:1!null j:3 + ├── stats: [rows=10000] + ├── cost: 1058.70858 + ├── key: (1) + ├── fd: (1)-->(3) + ├── ordering: +3 + └── limit hint: 1004.51 + # Inject stats corresponding to running: # INSERT INTO a VALUES (1, 1, 1); # diff --git a/pkg/sql/opt/xform/testdata/rules/groupby b/pkg/sql/opt/xform/testdata/rules/groupby index ea040625bd47..5836e029ab61 100644 --- a/pkg/sql/opt/xform/testdata/rules/groupby +++ b/pkg/sql/opt/xform/testdata/rules/groupby @@ -462,7 +462,7 @@ memo (optimized, ~5KB, required=[presentation: min:7]) │ ├── best: (scan abc,cols=(1)) │ └── cost: 1074.52 ├── G3: (aggregations G6) - ├── G4: (limit G2 G7 ordering=+1) (scan abc,cols=(1),lim=1) (top-k G2 &{1 +1}) + ├── G4: (limit G2 G7 ordering=+1) (scan abc,cols=(1),lim=1) (top-k G2 &{1 +1 }) │ └── [] │ ├── best: (scan abc,cols=(1),lim=1) │ └── cost: 5.06 @@ -488,9 +488,9 @@ memo (optimized, ~6KB, required=[presentation: min:7]) │ ├── best: (scan abc,cols=(2)) │ └── cost: 1074.52 ├── G3: (aggregations G6) - ├── G4: (limit G7 G8 ordering=+2) (top-k G7 &{1 +2}) + ├── G4: (limit G7 G8 ordering=+2) (top-k G7 &{1 +2 }) │ └── [] - │ ├── best: (top-k G7 &{1 +2}) + │ ├── best: (top-k G7 &{1 +2 }) │ └── cost: 1104.37 ├── G5: (aggregations G9) ├── G6: (min G10) @@ -524,7 +524,7 @@ memo (optimized, ~5KB, required=[presentation: max:7]) │ ├── best: (scan abc,cols=(1)) │ └── cost: 1074.52 ├── G3: (aggregations G6) - ├── G4: (limit G2 G7 ordering=-1) (scan abc,rev,cols=(1),lim=1(rev)) (top-k G2 &{1 -1}) + ├── G4: (limit G2 G7 ordering=-1) (scan abc,rev,cols=(1),lim=1(rev)) (top-k G2 &{1 -1 }) │ └── [] │ ├── best: (scan abc,rev,cols=(1),lim=1(rev)) │ └── cost: 5.06 @@ -550,9 +550,9 @@ memo (optimized, ~6KB, required=[presentation: max:7]) │ ├── best: (scan abc,cols=(2)) │ └── cost: 1074.52 ├── G3: (aggregations G6) - ├── G4: (limit G7 G8 ordering=-2) (top-k G7 &{1 -2}) + ├── G4: (limit G7 G8 ordering=-2) (top-k G7 &{1 -2 }) │ └── [] - │ ├── best: (top-k G7 &{1 -2}) + │ ├── best: (top-k G7 &{1 -2 }) │ └── cost: 1104.37 ├── G5: (aggregations G9) ├── G6: (max G10) @@ -663,9 +663,9 @@ memo (optimized, ~6KB, required=[presentation: max:7]) │ ├── best: (scan abc,cols=(2)) │ └── cost: 1074.52 ├── G3: (aggregations G6) - ├── G4: (limit G7 G8 ordering=-2) (top-k G7 &{1 -2}) + ├── G4: (limit G7 G8 ordering=-2) (top-k G7 &{1 -2 }) │ └── [] - │ ├── best: (top-k G7 &{1 -2}) + │ ├── best: (top-k G7 &{1 -2 }) │ └── cost: 1104.37 ├── G5: (aggregations G9) ├── G6: (max G10) @@ -3447,12 +3447,12 @@ memo SELECT d, e, count(*) FROM defg GROUP BY d, e ORDER BY count(*) LIMIT 10 ---- memo (optimized, ~5KB, required=[presentation: d:1,e:2,count:8] [ordering: +8]) - ├── G1: (limit G2 G3 ordering=+8) (top-k G2 &{10 +8}) + ├── G1: (limit G2 G3 ordering=+8) (top-k G2 &{10 +8 }) │ ├── [presentation: d:1,e:2,count:8] [ordering: +8] - │ │ ├── best: (top-k G2 &{10 +8}) + │ │ ├── best: (top-k G2 &{10 +8 }) │ │ └── cost: 1231.64 │ └── [] - │ ├── best: (top-k G2 &{10 +8}) + │ ├── best: (top-k G2 &{10 +8 }) │ └── cost: 1231.64 ├── G2: (group-by G4 G5 cols=(1,2)) (group-by G4 G5 cols=(1,2),ordering=+1) │ ├── [ordering: +8] [limit hint: 10.00] diff --git a/pkg/sql/opt/xform/testdata/rules/limit b/pkg/sql/opt/xform/testdata/rules/limit index 2c31f58200eb..f404fae83d0a 100644 --- a/pkg/sql/opt/xform/testdata/rules/limit +++ b/pkg/sql/opt/xform/testdata/rules/limit @@ -745,6 +745,9 @@ GenerateTopK (higher cost) + │ └── fd: (1)-->(2) + └── filters + └── (a:1 >= 20) AND (a:1 <= 30) [outer=(1), constraints=(/1: [/20 - /30]; tight)] +-------------------------------------------------------------------------------- +GeneratePartialOrderTopK (no changes) +-------------------------------------------------------------------------------- ================================================================================ Final best expression Cost: 3571.71 @@ -1657,8 +1660,8 @@ top-k memo expect=GenerateTopK SELECT * FROM a ORDER BY k LIMIT 1 ---- -memo (optimized, ~3KB, required=[presentation: k:1,i:2,f:3,s:4,j:5]) - ├── G1: (limit G2 G3 ordering=+1) (scan a,cols=(1-5),lim=1) (top-k G2 &{1 +1}) +memo (optimized, ~4KB, required=[presentation: k:1,i:2,f:3,s:4,j:5]) + ├── G1: (limit G2 G3 ordering=+1) (scan a,cols=(1-5),lim=1) (top-k G2 &{1 +1 }) │ └── [presentation: k:1,i:2,f:3,s:4,j:5] │ ├── best: (scan a,cols=(1-5),lim=1) │ └── cost: 5.11 @@ -1785,3 +1788,308 @@ offset │ ├── key: (1) │ └── fd: (1)-->(2-5) └── 3 + +# --------------------------------------------------- +# GenerateLimitedTopKScans +# --------------------------------------------------- + +exec-ddl +CREATE TABLE defg ( +d INT, +e INT, +f INT, +g INT, +INDEX dd (d), +INDEX dfg (d, f, g), +INDEX df (d, f) +) +---- + +# Generates an index scan on dd, dfg, and df and an index join to get all +# columns, though these are not the best cost plans. +memo expect=GenerateLimitedTopKScans +SELECT d, e FROM defg ORDER BY d, e LIMIT 10 +---- +memo (optimized, ~12KB, required=[presentation: d:1,e:2] [ordering: +1,+2]) + ├── G1: (limit G2 G3 ordering=+1,+2) (top-k G2 &{10 +1,+2 }) (top-k G4 &{10 +1,+2 }) (top-k G5 &{10 +1,+2 }) (top-k G6 &{10 +1,+2 }) (top-k G2 &{10 +1,+2 +1}) (top-k G4 &{10 +1,+2 +1}) (top-k G5 &{10 +1,+2 +1}) (top-k G6 &{10 +1,+2 +1}) + │ ├── [presentation: d:1,e:2] [ordering: +1,+2] + │ │ ├── best: (top-k G2 &{10 +1,+2 }) + │ │ └── cost: 1185.69 + │ └── [] + │ ├── best: (top-k G2 &{10 +1,+2 }) + │ └── cost: 1185.69 + ├── G2: (scan defg,cols=(1,2)) + │ ├── [ordering: +1,+2] [limit hint: 10.00] + │ │ ├── best: (sort G2) + │ │ └── cost: 1345.17 + │ ├── [ordering: +1] + │ │ ├── best: (sort G2) + │ │ └── cost: 1334.20 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (sort G2) + │ │ └── cost: 1334.20 + │ └── [] + │ ├── best: (scan defg,cols=(1,2)) + │ └── cost: 1094.72 + ├── G3: (const 10) + ├── G4: (index-join G7 defg,cols=(1,2)) + │ ├── [ordering: +1] + │ │ ├── best: (index-join G7="[ordering: +1]" defg,cols=(1,2)) + │ │ └── cost: 7134.44 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (index-join G7="[ordering: +1] [limit hint: 104.58]" defg,cols=(1,2)) + │ │ └── cost: 1336.81 + │ └── [] + │ ├── best: (index-join G7 defg,cols=(1,2)) + │ └── cost: 7134.44 + ├── G5: (index-join G8 defg,cols=(1,2)) + │ ├── [ordering: +1] + │ │ ├── best: (index-join G8="[ordering: +1]" defg,cols=(1,2)) + │ │ └── cost: 7154.64 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (index-join G8="[ordering: +1] [limit hint: 104.58]" defg,cols=(1,2)) + │ │ └── cost: 1338.90 + │ └── [] + │ ├── best: (index-join G8 defg,cols=(1,2)) + │ └── cost: 7154.64 + ├── G6: (index-join G9 defg,cols=(1,2)) + │ ├── [ordering: +1] + │ │ ├── best: (index-join G9="[ordering: +1]" defg,cols=(1,2)) + │ │ └── cost: 7144.54 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (index-join G9="[ordering: +1] [limit hint: 104.58]" defg,cols=(1,2)) + │ │ └── cost: 1337.85 + │ └── [] + │ ├── best: (index-join G9 defg,cols=(1,2)) + │ └── cost: 7144.54 + ├── G7: (scan defg@dd,cols=(1,5)) + │ ├── [ordering: +1] + │ │ ├── best: (scan defg@dd,cols=(1,5)) + │ │ └── cost: 1064.42 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (scan defg@dd,cols=(1,5)) + │ │ └── cost: 122.79 + │ └── [] + │ ├── best: (scan defg@dd,cols=(1,5)) + │ └── cost: 1064.42 + ├── G8: (scan defg@dfg,cols=(1,5)) + │ ├── [ordering: +1] + │ │ ├── best: (scan defg@dfg,cols=(1,5)) + │ │ └── cost: 1084.62 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (scan defg@dfg,cols=(1,5)) + │ │ └── cost: 124.88 + │ └── [] + │ ├── best: (scan defg@dfg,cols=(1,5)) + │ └── cost: 1084.62 + └── G9: (scan defg@df,cols=(1,5)) + ├── [ordering: +1] + │ ├── best: (scan defg@df,cols=(1,5)) + │ └── cost: 1074.52 + ├── [ordering: +1] [limit hint: 104.58] + │ ├── best: (scan defg@df,cols=(1,5)) + │ └── cost: 123.83 + └── [] + ├── best: (scan defg@df,cols=(1,5)) + └── cost: 1074.52 + +# Generates an index scan on df and an index join to get all columns. +opt expect=GenerateLimitedTopKScans +SELECT d, f, e FROM defg ORDER BY d, f, e LIMIT 10 +---- +top-k + ├── columns: d:1 f:3 e:2 + ├── internal-ordering: +1,+3,+2 + ├── k: 10 + ├── cardinality: [0 - 10] + ├── ordering: +1,+3,+2 + └── index-join defg + ├── columns: d:1 e:2 f:3 + ├── ordering: +1,+3 + ├── limit hint: 100.00 + └── scan defg@df + ├── columns: d:1 f:3 rowid:5!null + ├── key: (5) + ├── fd: (5)-->(1,3) + ├── ordering: +1,+3 + └── limit hint: 100.00 + +# Does not generate a limited top K scan because the order by columns are covered. +opt expect-not=GenerateLimitedTopKScans +SELECT d FROM defg ORDER BY d LIMIT 10 +---- +scan defg@dd + ├── columns: d:1 + ├── limit: 10 + └── ordering: +1 + +# First order column is not the first column in an index. +opt expect-not=GenerateLimitedTopKScans +SELECT * FROM defg ORDER BY f LIMIT 10 +---- +top-k + ├── columns: d:1 e:2 f:3 g:4 + ├── internal-ordering: +3 + ├── k: 10 + ├── cardinality: [0 - 10] + ├── ordering: +3 + └── scan defg + └── columns: d:1 e:2 f:3 g:4 + +# --------------------------------------------------- +# GeneratePartialOrderTopK +# --------------------------------------------------- + +# Index orderings dd, dfg, and df can be used. +memo expect=GeneratePartialOrderTopK +SELECT * FROM defg ORDER BY d, f, e LIMIT 10 +---- +memo (optimized, ~12KB, required=[presentation: d:1,e:2,f:3,g:4] [ordering: +1,+3,+2]) + ├── G1: (limit G2 G3 ordering=+1,+3,+2) (top-k G2 &{10 +1,+3,+2 }) (top-k G4 &{10 +1,+3,+2 }) (top-k G5 &{10 +1,+3,+2 }) (top-k G6 &{10 +1,+3,+2 }) (top-k G2 &{10 +1,+3,+2 +1,+3}) (top-k G4 &{10 +1,+3,+2 +1}) (top-k G5 &{10 +1,+3,+2 +1,+3}) (top-k G6 &{10 +1,+3,+2 +1,+3}) (top-k G4 &{10 +1,+3,+2 +1,+3}) + │ ├── [presentation: d:1,e:2,f:3,g:4] [ordering: +1,+3,+2] + │ │ ├── best: (top-k G6="[ordering: +1,+3] [limit hint: 100.00]" &{10 +1,+3,+2 +1,+3}) + │ │ └── cost: 737.57 + │ └── [] + │ ├── best: (top-k G2 &{10 +1,+3,+2 }) + │ └── cost: 1206.52 + ├── G2: (scan defg,cols=(1-4)) + │ ├── [ordering: +1,+3,+2] [limit hint: 10.00] + │ │ ├── best: (sort G2) + │ │ └── cost: 1386.46 + │ ├── [ordering: +1,+3] + │ │ ├── best: (sort G2) + │ │ └── cost: 1385.37 + │ ├── [ordering: +1,+3] [limit hint: 100.00] + │ │ ├── best: (sort G2) + │ │ └── cost: 1385.37 + │ └── [] + │ ├── best: (scan defg,cols=(1-4)) + │ └── cost: 1114.92 + ├── G3: (const 10) + ├── G4: (index-join G7 defg,cols=(1-4)) + │ ├── [ordering: +1,+3] + │ │ ├── best: (sort G4="[ordering: +1]") + │ │ └── cost: 7290.90 + │ ├── [ordering: +1,+3] [limit hint: 100.00] + │ │ ├── best: (sort G4="[ordering: +1]") + │ │ └── cost: 7290.90 + │ ├── [ordering: +1] + │ │ ├── best: (index-join G7="[ordering: +1]" defg,cols=(1-4)) + │ │ └── cost: 7154.44 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (index-join G7="[ordering: +1] [limit hint: 104.58]" defg,cols=(1-4)) + │ │ └── cost: 1340.81 + │ └── [] + │ ├── best: (index-join G7 defg,cols=(1-4)) + │ └── cost: 7154.44 + ├── G5: (index-join G8 defg,cols=(1-4)) + │ ├── [ordering: +1,+3] + │ │ ├── best: (index-join G8="[ordering: +1,+3]" defg,cols=(1-4)) + │ │ └── cost: 7174.84 + │ ├── [ordering: +1,+3] [limit hint: 100.00] + │ │ ├── best: (index-join G8="[ordering: +1,+3] [limit hint: 100.00]" defg,cols=(1-4)) + │ │ └── cost: 729.04 + │ └── [] + │ ├── best: (index-join G8 defg,cols=(1-4)) + │ └── cost: 7174.84 + ├── G6: (index-join G9 defg,cols=(1-4)) + │ ├── [ordering: +1,+3] + │ │ ├── best: (index-join G9="[ordering: +1,+3]" defg,cols=(1-4)) + │ │ └── cost: 7164.64 + │ ├── [ordering: +1,+3] [limit hint: 100.00] + │ │ ├── best: (index-join G9="[ordering: +1,+3] [limit hint: 100.00]" defg,cols=(1-4)) + │ │ └── cost: 728.04 + │ └── [] + │ ├── best: (index-join G9 defg,cols=(1-4)) + │ └── cost: 7164.64 + ├── G7: (scan defg@dd,cols=(1,5)) + │ ├── [ordering: +1] + │ │ ├── best: (scan defg@dd,cols=(1,5)) + │ │ └── cost: 1064.42 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (scan defg@dd,cols=(1,5)) + │ │ └── cost: 122.79 + │ └── [] + │ ├── best: (scan defg@dd,cols=(1,5)) + │ └── cost: 1064.42 + ├── G8: (scan defg@dfg,cols=(1,3-5)) + │ ├── [ordering: +1,+3] + │ │ ├── best: (scan defg@dfg,cols=(1,3-5)) + │ │ └── cost: 1104.82 + │ ├── [ordering: +1,+3] [limit hint: 100.00] + │ │ ├── best: (scan defg@dfg,cols=(1,3-5)) + │ │ └── cost: 122.02 + │ └── [] + │ ├── best: (scan defg@dfg,cols=(1,3-5)) + │ └── cost: 1104.82 + └── G9: (scan defg@df,cols=(1,3,5)) + ├── [ordering: +1,+3] + │ ├── best: (scan defg@df,cols=(1,3,5)) + │ └── cost: 1084.62 + ├── [ordering: +1,+3] [limit hint: 100.00] + │ ├── best: (scan defg@df,cols=(1,3,5)) + │ └── cost: 120.02 + └── [] + ├── best: (scan defg@df,cols=(1,3,5)) + └── cost: 1084.62 + +# Only index ordering dfg can be used for the topk. +memo expect=GeneratePartialOrderTopK disable=GenerateLimitedTopKScans +SELECT d, f, g FROM defg ORDER BY d, g LIMIT 10 +---- +memo (optimized, ~4KB, required=[presentation: d:1,f:3,g:4] [ordering: +1,+4]) + ├── G1: (limit G2 G3 ordering=+1,+4) (top-k G2 &{10 +1,+4 }) (top-k G2 &{10 +1,+4 +1}) + │ ├── [presentation: d:1,f:3,g:4] [ordering: +1,+4] + │ │ ├── best: (top-k G2="[ordering: +1] [limit hint: 104.58]" &{10 +1,+4 +1}) + │ │ └── cost: 135.73 + │ └── [] + │ ├── best: (top-k G2 &{10 +1,+4 }) + │ └── cost: 1185.79 + ├── G2: (scan defg,cols=(1,3,4)) (scan defg@dfg,cols=(1,3,4)) + │ ├── [ordering: +1,+4] [limit hint: 10.00] + │ │ ├── best: (sort G2="[ordering: +1]") + │ │ └── cost: 1221.18 + │ ├── [ordering: +1] + │ │ ├── best: (scan defg@dfg,cols=(1,3,4)) + │ │ └── cost: 1094.72 + │ ├── [ordering: +1] [limit hint: 104.58] + │ │ ├── best: (scan defg@dfg,cols=(1,3,4)) + │ │ └── cost: 125.92 + │ └── [] + │ ├── best: (scan defg@dfg,cols=(1,3,4)) + │ └── cost: 1094.72 + └── G3: (const 10) + +# Ensure that we don't incorrectly use orderings that don't match the direction. +memo expect-not=GeneratePartialOrderTopK +SELECT * FROM defg ORDER BY g DESC LIMIT 10 +---- +memo (optimized, ~3KB, required=[presentation: d:1,e:2,f:3,g:4] [ordering: -4]) + ├── G1: (limit G2 G3 ordering=-4) (top-k G2 &{10 -4 }) + │ ├── [presentation: d:1,e:2,f:3,g:4] [ordering: -4] + │ │ ├── best: (top-k G2 &{10 -4 }) + │ │ └── cost: 1201.77 + │ └── [] + │ ├── best: (top-k G2 &{10 -4 }) + │ └── cost: 1201.77 + ├── G2: (scan defg,cols=(1-4)) + │ ├── [ordering: -4] [limit hint: 10.00] + │ │ ├── best: (sort G2) + │ │ └── cost: 1374.40 + │ └── [] + │ ├── best: (scan defg,cols=(1-4)) + │ └── cost: 1114.92 + └── G3: (const 10) + +# No index matches. +opt expect-not=GeneratePartialOrderTopK +SELECT * FROM defg ORDER BY e LIMIT 10 +---- +top-k + ├── columns: d:1 e:2 f:3 g:4 + ├── internal-ordering: +2 + ├── k: 10 + ├── cardinality: [0 - 10] + ├── ordering: +2 + └── scan defg + └── columns: d:1 e:2 f:3 g:4 diff --git a/pkg/sql/opt/xform/testdata/rules/scan b/pkg/sql/opt/xform/testdata/rules/scan index 9f412475f4ab..687b097c915e 100644 --- a/pkg/sql/opt/xform/testdata/rules/scan +++ b/pkg/sql/opt/xform/testdata/rules/scan @@ -48,7 +48,7 @@ memo SELECT k,f FROM a ORDER BY k DESC LIMIT 10 ---- memo (optimized, ~4KB, required=[presentation: k:1,f:3] [ordering: -1]) - ├── G1: (limit G2 G3 ordering=-1) (scan a,rev,cols=(1,3),lim=10(rev)) (top-k G2 &{10 -1}) + ├── G1: (limit G2 G3 ordering=-1) (scan a,rev,cols=(1,3),lim=10(rev)) (top-k G2 &{10 -1 }) │ ├── [presentation: k:1,f:3] [ordering: -1] │ │ ├── best: (scan a,rev,cols=(1,3),lim=10(rev)) │ │ └── cost: 15.04