opt: move splitScanIntoUnionScans to general_funcs.go

This commit moves splitScanIntoUnionScans and all helper functions into general_funcs.go since it's now used in both limit_funcs.go and groupby_funcs.go. This is a separate commit for ease of reviewing. Release note: None
cockroachdb · May 19, 2021 · 35b1ec6 · 35b1ec6
1 parent b033f6c
commit 35b1ec6
Show file tree

Hide file tree

Showing 3 changed files with 267 additions and 265 deletions.
diff --git a/pkg/sql/opt/xform/general_funcs.go b/pkg/sql/opt/xform/general_funcs.go
@@ -16,10 +16,13 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
+	"github.com/cockroachdb/cockroach/pkg/sql/opt/constraint"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt/idxconstraint"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
+	"github.com/cockroachdb/cockroach/pkg/sql/opt/ordering"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt/partialidx"
+	"github.com/cockroachdb/cockroach/pkg/sql/opt/props"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
 	"github.com/cockroachdb/cockroach/pkg/util"
 	"github.com/cockroachdb/errors"
@@ -474,3 +477,265 @@ func getSortedPrefixes(index cat.Index, localPartitions util.FastIntSet) []prefi
 	sort.Sort(allPrefixes)
 	return allPrefixes
 }
+
+// splitScanIntoUnionScans tries to find a UnionAll of Scan operators (with an
+// optional hard limit) that each scan over a single key from the original
+// Scan's constraints. The UnionAll is returned if the scans can provide the
+// given ordering, and if the statistics suggest that splitting the scan could
+// be beneficial. If no such UnionAll of Scans can be found, ok=false is
+// returned. This is beneficial in cases where an ordering is required on a
+// suffix of the index columns, and constraining the first column(s) allows the
+// scan to provide that ordering.
+// TODO(drewk): handle inverted scans.
+func (c *CustomFuncs) splitScanIntoUnionScans(
+	ordering props.OrderingChoice,
+	scan memo.RelExpr,
+	sp *memo.ScanPrivate,
+	cons *constraint.Constraint,
+	limit int,
+	keyPrefixLength int,
+) (_ memo.RelExpr, ok bool) {
+	const maxScanCount = 16
+	const threshold = 4
+
+	keyCtx := constraint.MakeKeyContext(&cons.Columns, c.e.evalCtx)
+	spans := cons.Spans
+
+	// Get the total number of keys that can be extracted from the spans. Also
+	// keep track of the first span which would exceed the Scan budget if it was
+	// used to construct new Scan operators.
+	var keyCount int
+	budgetExceededIndex := spans.Count()
+	additionalScanBudget := maxScanCount
+	for i := 0; i < spans.Count(); i++ {
+		if cnt, ok := spans.Get(i).KeyCount(&keyCtx, keyPrefixLength); ok {
+			keyCount += int(cnt)
+			additionalScanBudget -= int(cnt)
+			if additionalScanBudget < 0 {
+				// Splitting any spans from this span on would lead to exceeding the max
+				// Scan count. Keep track of the index of this span.
+				budgetExceededIndex = i
+			}
+		}
+	}
+	if keyCount <= 0 || (keyCount == 1 && spans.Count() == 1) || budgetExceededIndex == 0 {
+		// Ensure that at least one new Scan will be constructed.
+		return nil, false
+	}
+
+	scanCount := keyCount
+	if scanCount > maxScanCount {
+		// We will construct at most maxScanCount new Scans.
+		scanCount = maxScanCount
+	}
+
+	if limit > 0 {
+		if scan.Relational().Stats.Available &&
+			float64(scanCount*limit*threshold) >= scan.Relational().Stats.RowCount {
+			// Splitting the Scan may not be worth the overhead. Creating a sequence of
+			// Scans and Unions is expensive, so we only want to create the plan if it
+			// is likely to be used.
+			return nil, false
+		}
+	}
+
+	// The index ordering must have a prefix of columns of length keyLength
+	// followed by the ordering columns either in order or in reverse order.
+	hasOrderingSeq, reverse := indexHasOrderingSequence(
+		c.e.mem.Metadata(), scan, sp, ordering, keyPrefixLength)
+	if !hasOrderingSeq {
+		return nil, false
+	}
+	newHardLimit := memo.MakeScanLimit(int64(limit), reverse)
+
+	// makeNewUnion extends the UnionAll tree rooted at 'last' to include
+	// 'newScan'. The ColumnIDs of the original Scan are used by the resulting
+	// expression.
+	makeNewUnion := func(last, newScan memo.RelExpr, outCols opt.ColList) memo.RelExpr {
+		return c.e.f.ConstructUnionAll(last, newScan, &memo.SetPrivate{
+			LeftCols:  last.Relational().OutputCols.ToList(),
+			RightCols: newScan.Relational().OutputCols.ToList(),
+			OutCols:   outCols,
+		})
+	}
+
+	// Attempt to extract single-key spans and use them to construct limited
+	// Scans. Add these Scans to a UnionAll tree. Any remaining spans will be used
+	// to construct a single unlimited Scan, which will also be added to the
+	// UnionAll tree.
+	var noLimitSpans constraint.Spans
+	var last memo.RelExpr
+	for i, n := 0, spans.Count(); i < n; i++ {
+		if i >= budgetExceededIndex {
+			// The Scan budget has been reached; no additional Scans can be created.
+			noLimitSpans.Append(spans.Get(i))
+			continue
+		}
+		singleKeySpans, ok := spans.Get(i).Split(&keyCtx, keyPrefixLength)
+		if !ok {
+			// Single key spans could not be extracted from this span, so add it to
+			// the set of spans that will be used to construct an unlimited Scan.
+			noLimitSpans.Append(spans.Get(i))
+			continue
+		}
+		for j, m := 0, singleKeySpans.Count(); j < m; j++ {
+			if last == nil {
+				// This is the first limited Scan, so no UnionAll necessary.
+				last = c.makeNewScan(sp, cons.Columns, newHardLimit, singleKeySpans.Get(j))
+				continue
+			}
+			// Construct a new Scan for each span.
+			newScan := c.makeNewScan(sp, cons.Columns, newHardLimit, singleKeySpans.Get(j))
+
+			// Add the scan to the union tree. If it is the final union in the
+			// tree, use the original scan's columns as the union's out columns.
+			// Otherwise, create new output column IDs for the union.
+			var outCols opt.ColList
+			finalUnion := i == n-1 && j == m-1 && noLimitSpans.Count() == 0
+			if finalUnion {
+				outCols = sp.Cols.ToList()
+			} else {
+				_, cols := c.DuplicateColumnIDs(sp.Table, sp.Cols)
+				outCols = cols.ToList()
+			}
+			last = makeNewUnion(last, newScan, outCols)
+		}
+	}
+	if noLimitSpans.Count() == spans.Count() {
+		// Expect to generate at least one new limited single-key Scan. This could
+		// happen if a valid key count could be obtained for at least span, but no
+		// span could be split into single-key spans.
+		return nil, false
+	}
+	if noLimitSpans.Count() == 0 {
+		// All spans could be used to generate limited Scans.
+		return last, true
+	}
+
+	// If any spans could not be used to generate limited Scans, use them to
+	// construct an unlimited Scan and add it to the UnionAll tree.
+	newScanPrivate := c.DuplicateScanPrivate(sp)
+	newScanPrivate.SetConstraint(c.e.evalCtx, &constraint.Constraint{
+		Columns: sp.Constraint.Columns.RemapColumns(sp.Table, newScanPrivate.Table),
+		Spans:   noLimitSpans,
+	})
+	newScan := c.e.f.ConstructScan(newScanPrivate)
+	return makeNewUnion(last, newScan, sp.Cols.ToList()), true
+}
+
+// indexHasOrderingSequence returns whether the Scan can provide a given
+// ordering under the assumption that we are scanning a single-key span with the
+// given keyLength (and if so, whether we need to scan it in reverse).
+// For example:
+//
+// index: +1/-2/+3,
+// limitOrdering: -2/+3,
+// keyLength: 1,
+// =>
+// hasSequence: True, reverse: False
+//
+// index: +1/-2/+3,
+// limitOrdering: +2/-3,
+// keyLength: 1,
+// =>
+// hasSequence: True, reverse: True
+//
+// index: +1/-2/+3/+4,
+// limitOrdering: +3/+4,
+// keyLength: 1,
+// =>
+// hasSequence: False, reverse: False
+//
+func indexHasOrderingSequence(
+	md *opt.Metadata,
+	scan memo.RelExpr,
+	sp *memo.ScanPrivate,
+	limitOrdering props.OrderingChoice,
+	keyLength int,
+) (hasSequence, reverse bool) {
+	tableMeta := md.TableMeta(sp.Table)
+	index := tableMeta.Table.Index(sp.Index)
+
+	if keyLength > index.ColumnCount() {
+		// The key contains more columns than the index. The limit ordering sequence
+		// cannot be part of the index ordering.
+		return false, false
+	}
+
+	// Create a copy of the Scan's FuncDepSet, and add the first 'keyCount'
+	// columns from the index as constant columns. The columns are constant
+	// because the span contains only a single key on those columns.
+	var fds props.FuncDepSet
+	fds.CopyFrom(&scan.Relational().FuncDeps)
+	prefixCols := opt.ColSet{}
+	for i := 0; i < keyLength; i++ {
+		col := sp.Table.IndexColumnID(index, i)
+		prefixCols.Add(col)
+	}
+	fds.AddConstants(prefixCols)
+
+	// Use fds to simplify a copy of the limit ordering; the prefix columns will
+	// become part of the optional ColSet.
+	requiredOrdering := limitOrdering.Copy()
+	requiredOrdering.Simplify(&fds)
+
+	// If the ScanPrivate can satisfy requiredOrdering, it must return columns
+	// ordered by a prefix of length keyLength, followed by the columns of
+	// limitOrdering.
+	return ordering.ScanPrivateCanProvide(md, sp, &requiredOrdering)
+}
+
+// makeNewScan constructs a new Scan operator with a new TableID and the given
+// limit and span. All ColumnIDs and references to those ColumnIDs are
+// replaced with new ones from the new TableID. All other fields are simply
+// copied from the old ScanPrivate.
+func (c *CustomFuncs) makeNewScan(
+	sp *memo.ScanPrivate,
+	columns constraint.Columns,
+	newHardLimit memo.ScanLimit,
+	span *constraint.Span,
+) memo.RelExpr {
+	newScanPrivate := c.DuplicateScanPrivate(sp)
+
+	// duplicateScanPrivate does not initialize the Constraint or HardLimit
+	// fields, so we do that now.
+	newScanPrivate.HardLimit = newHardLimit
+
+	// Construct the new Constraint field with the given span and remapped
+	// ordering columns.
+	var newSpans constraint.Spans
+	newSpans.InitSingleSpan(span)
+	newConstraint := &constraint.Constraint{
+		Columns: columns.RemapColumns(sp.Table, newScanPrivate.Table),
+		Spans:   newSpans,
+	}
+	newScanPrivate.SetConstraint(c.e.evalCtx, newConstraint)
+
+	return c.e.f.ConstructScan(newScanPrivate)
+}
+
+// getKnownScanConstraint returns a Constraint that is known to hold true for
+// the output of the Scan operator with the given ScanPrivate. If the
+// ScanPrivate has a Constraint, the scan Constraint is returned. Otherwise, an
+// effort is made to retrieve a Constraint from the underlying table's check
+// constraints. getKnownScanConstraint assumes that the scan is not inverted.
+func (c *CustomFuncs) getKnownScanConstraint(
+	sp *memo.ScanPrivate,
+) (cons *constraint.Constraint, found bool) {
+	if sp.Constraint != nil {
+		// The ScanPrivate has a constraint, so return it.
+		cons = sp.Constraint
+	} else {
+		// Build a constraint set with the check constraints of the underlying
+		// table.
+		filters := c.checkConstraintFilters(sp.Table)
+		instance := c.initIdxConstraintForIndex(
+			nil, /* requiredFilters */
+			filters,
+			sp.Table,
+			sp.Index,
+		)
+		cons = instance.Constraint()
+	}
+	return cons, !cons.IsUnconstrained()
+}
diff --git a/pkg/sql/opt/xform/groupby_funcs.go b/pkg/sql/opt/xform/groupby_funcs.go
@@ -252,7 +252,7 @@ func (c *CustomFuncs) SplitGroupByScanIntoUnionScans(
 	// GroupingPrivate (if no such UnionAll is possible this will return
 	// ok=false). We pass a limit of 0 since the scans are unlimited
 	// (splitScanIntoUnionScans is also used for another rule with limited scans).
-	return c.SplitScanIntoUnionScans(
+	return c.splitScanIntoUnionScans(
 		intraOrd, scan, sp, cons, 0 /* limit */, keyPrefixLength,
 	)
 }