Merge #62406

62406: opt: produce constant columns in predicate from partial index scan r=mgartner a=mgartner #### opt: refactor indexScanBuilder This commit refactors `indexScanBuilder` to make it safer to use and maintain. - Functions that are intended for use outside of the struct's implementation have been capitalized to make this more apparent. - Helper functions that determine the state of the builder have been added to simplify the internal logic of the builder. - `primaryKeyCols` was removed because it is unrelated to building scans. Release note: None #### opt: produce constant columns in predicate from partial index scan `GenerateConstrainedScans` and `GeneratePartialIndexScans` now build Project expressions to produce columns that are held constant in partial index predicates when possible. This allows a partial index to cover columns and avoid IndexJoins in more cases. A column held constant in a partial index predicate is only projected when: 1. The index does not include the column. 2. All columns can be covered because they are either in the index or held constant by the predicate. In other words, a Project is only built when doing so eliminates an IndexJoin. Resolves #60532 Release note (performance improvement): Columns that are held constant in partial index predicates can now be produced when scanning the partial index. This eliminates unnecessary primary index joins to retrieve those constant columns in some queries, resulting in lower latency. Co-authored-by: Marcus Gartner <[email protected]>
cockroachdb · Mar 26, 2021 · 3cfe2a3 · 3cfe2a3
2 parents bd6b86d + 3bb6aa1
commit 3cfe2a3
Show file tree

Hide file tree

Showing 10 changed files with 874 additions and 382 deletions.
diff --git a/pkg/sql/opt/memo/testdata/stats/partial-index-scan b/pkg/sql/opt/memo/testdata/stats/partial-index-scan
@@ -430,17 +430,20 @@ CREATE TABLE t (
 opt
 SELECT * FROM t WHERE pk2 = 1 AND b1 = false AND b2 = false
 ----
-index-join t
+project
  ├── columns: pk1:1(int!null) pk2:2(int!null) b1:3(bool!null) b2:4(bool!null)
  ├── stats: [rows=1.245025, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0, distinct(2-4)=1, null(2-4)=0]
  ├── key: (1)
  ├── fd: ()-->(2-4)
- └── scan t@secondary,partial
-      ├── columns: pk1:1(int!null) pk2:2(int!null)
-      ├── constraint: /2/1: [/1 - /1]
-      ├── stats: [rows=1.245025, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0, distinct(2-4)=1, null(2-4)=0]
-      ├── key: (1)
-      └── fd: ()-->(2)
+ ├── scan t@secondary,partial
+ │    ├── columns: pk1:1(int!null) pk2:2(int!null)
+ │    ├── constraint: /2/1: [/1 - /1]
+ │    ├── stats: [rows=1.245025, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0, distinct(2-4)=1, null(2-4)=0]
+ │    ├── key: (1)
+ │    └── fd: ()-->(2)
+ └── projections
+      ├── false [as=b1:3, type=bool]
+      └── false [as=b2:4, type=bool]
 
 
 # ---------------------
@@ -701,7 +704,7 @@ CREATE INDEX idx ON hist (i) WHERE i > 100 AND i <= 200 AND s = 'banana'
 opt
 SELECT * FROM hist WHERE i > 125 AND i < 150 AND s = 'banana'
 ----
-index-join hist
+project
  ├── columns: k:1(int!null) i:2(int!null) s:3(string!null)
  ├── stats: [rows=6.91433927, distinct(2)=3.09090909, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=3.09090909, null(2,3)=0]
  │   histogram(2)=  0   0   6.6262 0.2881
@@ -710,16 +713,18 @@ index-join hist
  │                <--- 'banana'
  ├── key: (1)
  ├── fd: ()-->(3), (1)-->(2)
- └── scan hist@idx,partial
-      ├── columns: k:1(int!null) i:2(int!null)
-      ├── constraint: /2/1: [/126 - /149]
-      ├── stats: [rows=6.91433927, distinct(2)=3.09090909, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=3.09090909, null(2,3)=0]
-      │   histogram(2)=  0   0   6.6262 0.2881
-      │                <--- 125 -------- 149 -
-      │   histogram(3)=  0   6.9143
-      │                <--- 'banana'
-      ├── key: (1)
-      └── fd: (1)-->(2)
+ ├── scan hist@idx,partial
+ │    ├── columns: k:1(int!null) i:2(int!null)
+ │    ├── constraint: /2/1: [/126 - /149]
+ │    ├── stats: [rows=6.91433927, distinct(2)=3.09090909, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=3.09090909, null(2,3)=0]
+ │    │   histogram(2)=  0   0   6.6262 0.2881
+ │    │                <--- 125 -------- 149 -
+ │    │   histogram(3)=  0   6.9143
+ │    │                <--- 'banana'
+ │    ├── key: (1)
+ │    └── fd: (1)-->(2)
+ └── projections
+      └── 'banana' [as=s:3, type=string]
 
 exec-ddl
 DROP INDEX idx

diff --git a/pkg/sql/opt/xform/index_scan_builder.go b/pkg/sql/opt/xform/index_scan_builder.go
@@ -13,7 +13,6 @@ package xform
 import (
 	"github.com/cockroachdb/cockroach/pkg/sql/inverted"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt"
-	"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedexpr"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
 	"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
@@ -32,26 +31,27 @@ import (
 // make the following calls:
 //
 //   var sb indexScanBuilder
-//   sb.init(c, tabID)
-//   sb.setScan(scanPrivate)
-//   sb.addSelect(filters)
-//   sb.addIndexJoin(cols)
-//   expr := sb.build()
+//   sb.Init(c, tabID)
+//   sb.SetScan(scanPrivate)
+//   sb.AddSelect(filters)
+//   sb.AddIndexJoin(cols)
+//   expr := sb.Build()
 //
 type indexScanBuilder struct {
 	c                     *CustomFuncs
 	f                     *norm.Factory
 	mem                   *memo.Memo
 	tabID                 opt.TableID
-	pkCols                opt.ColSet
 	scanPrivate           memo.ScanPrivate
+	constProjections      memo.ProjectionsExpr
 	innerFilters          memo.FiltersExpr
 	outerFilters          memo.FiltersExpr
 	invertedFilterPrivate memo.InvertedFilterPrivate
 	indexJoinPrivate      memo.IndexJoinPrivate
 }
 
-func (b *indexScanBuilder) init(c *CustomFuncs, tabID opt.TableID) {
+// Init initializes an indexScanBuilder.
+func (b *indexScanBuilder) Init(c *CustomFuncs, tabID opt.TableID) {
 	// This initialization pattern ensures that fields are not unwittingly
 	// reused. Field reuse must be explicit.
 	*b = indexScanBuilder{
@@ -62,42 +62,46 @@ func (b *indexScanBuilder) init(c *CustomFuncs, tabID opt.TableID) {
 	}
 }
 
-// primaryKeyCols returns the columns from the scanned table's primary index.
-func (b *indexScanBuilder) primaryKeyCols() opt.ColSet {
-	// Ensure that pkCols set is initialized with the primary index columns.
-	if b.pkCols.Empty() {
-		primaryIndex := b.c.e.mem.Metadata().Table(b.tabID).Index(cat.PrimaryIndex)
-		for i, cnt := 0, primaryIndex.KeyColumnCount(); i < cnt; i++ {
-			b.pkCols.Add(b.tabID.IndexColumnID(primaryIndex, i))
-		}
+// SetScan constructs a standalone Scan expression. As a side effect, it clears
+// any expressions added during previous invocations of the builder. SetScan
+// makes a copy of scanPrivate so that it doesn't escape.
+func (b *indexScanBuilder) SetScan(scanPrivate *memo.ScanPrivate) {
+	*b = indexScanBuilder{
+		c:           b.c,
+		f:           b.f,
+		mem:         b.mem,
+		tabID:       b.tabID,
+		scanPrivate: *scanPrivate,
 	}
-	return b.pkCols
 }
 
-// setScan constructs a standalone Scan expression. As a side effect, it clears
-// any expressions added during previous invocations of the builder. setScan
-// makes a copy of scanPrivate so that it doesn't escape.
-func (b *indexScanBuilder) setScan(scanPrivate *memo.ScanPrivate) {
-	b.scanPrivate = *scanPrivate
-	b.innerFilters = nil
-	b.outerFilters = nil
-	b.invertedFilterPrivate = memo.InvertedFilterPrivate{}
-	b.indexJoinPrivate = memo.IndexJoinPrivate{}
+// AddConstProjections wraps the input expression with a Project expression with
+// the given constant projection expressions.
+func (b *indexScanBuilder) AddConstProjections(proj memo.ProjectionsExpr) {
+	if len(proj) != 0 {
+		if b.hasConstProjections() {
+			panic(errors.AssertionFailedf("cannot call AddConstProjections twice"))
+		}
+		if b.hasInnerFilters() || b.hasOuterFilters() {
+			panic(errors.AssertionFailedf("cannot call AddConstProjections after filters are added"))
+		}
+		b.constProjections = proj
+	}
 }
 
-// addInvertedFilter wraps the input expression with an InvertedFilter
+// AddInvertedFilter wraps the input expression with an InvertedFilter
 // expression having the given span expression.
-func (b *indexScanBuilder) addInvertedFilter(
+func (b *indexScanBuilder) AddInvertedFilter(
 	spanExpr *inverted.SpanExpression,
 	pfState *invertedexpr.PreFiltererStateForInvertedFilterer,
 	invertedCol opt.ColumnID,
 ) {
 	if spanExpr != nil {
-		if b.invertedFilterPrivate.InvertedColumn != 0 {
-			panic(errors.AssertionFailedf("cannot call addInvertedFilter twice"))
+		if b.hasInvertedFilter() {
+			panic(errors.AssertionFailedf("cannot call AddInvertedFilter twice"))
 		}
-		if b.indexJoinPrivate.Table != 0 {
-			panic(errors.AssertionFailedf("cannot add inverted filter after index join is added"))
+		if b.hasIndexJoin() {
+			panic(errors.AssertionFailedf("cannot call AddInvertedFilter after index join is added"))
 		}
 		b.invertedFilterPrivate = memo.InvertedFilterPrivate{
 			InvertedExpression: spanExpr,
@@ -107,30 +111,30 @@ func (b *indexScanBuilder) addInvertedFilter(
 	}
 }
 
-// addSelect wraps the input expression with a Select expression having the
+// AddSelect wraps the input expression with a Select expression having the
 // given filter.
-func (b *indexScanBuilder) addSelect(filters memo.FiltersExpr) {
+func (b *indexScanBuilder) AddSelect(filters memo.FiltersExpr) {
 	if len(filters) != 0 {
-		if b.indexJoinPrivate.Table == 0 {
-			if b.innerFilters != nil {
-				panic(errors.AssertionFailedf("cannot call addSelect methods twice before index join is added"))
+		if !b.hasIndexJoin() {
+			if b.hasInnerFilters() {
+				panic(errors.AssertionFailedf("cannot call AddSelect methods twice before index join is added"))
 			}
 			b.innerFilters = filters
 		} else {
-			if b.outerFilters != nil {
-				panic(errors.AssertionFailedf("cannot call addSelect methods twice after index join is added"))
+			if b.hasOuterFilters() {
+				panic(errors.AssertionFailedf("cannot call AddSelect methods twice after index join is added"))
 			}
 			b.outerFilters = filters
 		}
 	}
 }
 
-// addSelectAfterSplit first splits the given filter into two parts: a filter
+// AddSelectAfterSplit first splits the given filter into two parts: a filter
 // that only involves columns in the given set, and a remaining filter that
 // includes everything else. The filter that is bound by the columns becomes a
 // Select expression that wraps the input expression, and the remaining filter
 // is returned (or 0 if there is no remaining filter).
-func (b *indexScanBuilder) addSelectAfterSplit(
+func (b *indexScanBuilder) AddSelectAfterSplit(
 	filters memo.FiltersExpr, cols opt.ColSet,
 ) (remainingFilters memo.FiltersExpr) {
 	if len(filters) == 0 {
@@ -139,7 +143,7 @@ func (b *indexScanBuilder) addSelectAfterSplit(
 
 	if b.c.FiltersBoundBy(filters, cols) {
 		// Filter is fully bound by the cols, so add entire filter.
-		b.addSelect(filters)
+		b.AddSelect(filters)
 		return nil
 	}
 
@@ -152,48 +156,63 @@ func (b *indexScanBuilder) addSelectAfterSplit(
 	}
 
 	// Add conditions which are fully bound by the cols and return the rest.
-	b.addSelect(boundConditions)
+	b.AddSelect(boundConditions)
 	return b.c.ExtractUnboundConditions(filters, cols)
 }
 
-// addIndexJoin wraps the input expression with an IndexJoin expression that
+// AddIndexJoin wraps the input expression with an IndexJoin expression that
 // produces the given set of columns by lookup in the primary index.
-func (b *indexScanBuilder) addIndexJoin(cols opt.ColSet) {
-	if b.indexJoinPrivate.Table != 0 {
-		panic(errors.AssertionFailedf("cannot call addIndexJoin twice"))
+func (b *indexScanBuilder) AddIndexJoin(cols opt.ColSet) {
+	if b.hasIndexJoin() {
+		panic(errors.AssertionFailedf("cannot call AddIndexJoin twice"))
 	}
-	if b.outerFilters != nil {
-		panic(errors.AssertionFailedf("cannot add index join after an outer filter has been added"))
+	if b.hasOuterFilters() {
+		panic(errors.AssertionFailedf("cannot call AddIndexJoin after an outer filter has been added"))
 	}
 	b.indexJoinPrivate = memo.IndexJoinPrivate{
 		Table: b.tabID,
 		Cols:  cols,
 	}
 }
 
-// build constructs the final memo expression by composing together the various
+// Build constructs the final memo expression by composing together the various
 // expressions that were specified by previous calls to various add methods.
-func (b *indexScanBuilder) build(grp memo.RelExpr) {
+func (b *indexScanBuilder) Build(grp memo.RelExpr) {
 	// 1. Only scan.
-	if len(b.innerFilters) == 0 && b.indexJoinPrivate.Table == 0 {
+	if !b.hasConstProjections() && !b.hasInnerFilters() && !b.hasInvertedFilter() && !b.hasIndexJoin() {
 		b.mem.AddScanToGroup(&memo.ScanExpr{ScanPrivate: b.scanPrivate}, grp)
 		return
 	}
 
-	// 2. Wrap scan in inner filter if it was added.
 	input := b.f.ConstructScan(&b.scanPrivate)
-	if len(b.innerFilters) != 0 {
-		if b.indexJoinPrivate.Table == 0 && b.invertedFilterPrivate.InvertedColumn == 0 {
+
+	// 2. Wrap input in a Project if constant projections were added.
+	if b.hasConstProjections() {
+		if !b.hasInnerFilters() && !b.hasInvertedFilter() && !b.hasIndexJoin() {
+			b.mem.AddProjectToGroup(&memo.ProjectExpr{
+				Input:       input,
+				Projections: b.constProjections,
+				Passthrough: b.scanPrivate.Cols,
+			}, grp)
+			return
+		}
+
+		input = b.f.ConstructProject(input, b.constProjections, b.scanPrivate.Cols)
+	}
+
+	// 3. Wrap input in inner filter if it was added.
+	if b.hasInnerFilters() {
+		if !b.hasInvertedFilter() && !b.hasIndexJoin() {
 			b.mem.AddSelectToGroup(&memo.SelectExpr{Input: input, Filters: b.innerFilters}, grp)
 			return
 		}
 
 		input = b.f.ConstructSelect(input, b.innerFilters)
 	}
 
-	// 3. Wrap input in inverted filter if it was added.
-	if b.invertedFilterPrivate.InvertedColumn != 0 {
-		if b.indexJoinPrivate.Table == 0 {
+	// 4. Wrap input in inverted filter if it was added.
+	if b.hasInvertedFilter() {
+		if !b.hasIndexJoin() {
 			invertedFilter := &memo.InvertedFilterExpr{
 				Input: input, InvertedFilterPrivate: b.invertedFilterPrivate,
 			}
@@ -204,9 +223,9 @@ func (b *indexScanBuilder) build(grp memo.RelExpr) {
 		input = b.f.ConstructInvertedFilter(input, &b.invertedFilterPrivate)
 	}
 
-	// 4. Wrap input in index join if it was added.
-	if b.indexJoinPrivate.Table != 0 {
-		if len(b.outerFilters) == 0 {
+	// 5. Wrap input in index join if it was added.
+	if b.hasIndexJoin() {
+		if !b.hasOuterFilters() {
 			indexJoin := &memo.IndexJoinExpr{Input: input, IndexJoinPrivate: b.indexJoinPrivate}
 			b.mem.AddIndexJoinToGroup(indexJoin, grp)
 			return
@@ -215,11 +234,38 @@ func (b *indexScanBuilder) build(grp memo.RelExpr) {
 		input = b.f.ConstructIndexJoin(input, &b.indexJoinPrivate)
 	}
 
-	// 5. Wrap input in outer filter (which must exist at this point).
-	if len(b.outerFilters) == 0 {
-		// indexJoinDef == 0: outerFilters == 0 handled by #1 and #2 above.
-		// indexJoinDef != 0: outerFilters == 0 handled by #3 above.
+	// 6. Wrap input in outer filter (which must exist at this point).
+	if !b.hasOuterFilters() {
+		// indexJoinDef == 0: outerFilters == 0 handled by #1-4 above.
+		// indexJoinDef != 0: outerFilters == 0 handled by #5 above.
 		panic(errors.AssertionFailedf("outer filter cannot be 0 at this point"))
 	}
 	b.mem.AddSelectToGroup(&memo.SelectExpr{Input: input, Filters: b.outerFilters}, grp)
 }
+
+// hasConstProjections returns true if constant projections have been added to
+// the builder.
+func (b *indexScanBuilder) hasConstProjections() bool {
+	return len(b.constProjections) != 0
+}
+
+// hasInnerFilters returns true if inner filters have been added to the builder.
+func (b *indexScanBuilder) hasInnerFilters() bool {
+	return len(b.innerFilters) != 0
+}
+
+// hasOuterFilters returns true if outer filters have been added to the builder.
+func (b *indexScanBuilder) hasOuterFilters() bool {
+	return len(b.outerFilters) != 0
+}
+
+// hasInvertedFilter returns true if inverted filters have been added to the
+// builder.
+func (b *indexScanBuilder) hasInvertedFilter() bool {
+	return b.invertedFilterPrivate.InvertedColumn != 0
+}
+
+// hasIndexJoin returns true if an index join has been added to the builder.
+func (b *indexScanBuilder) hasIndexJoin() bool {
+	return b.indexJoinPrivate.Table != 0
+}
diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go
@@ -229,8 +229,8 @@ func (c *CustomFuncs) GenerateLookupJoins(
 
 	var pkCols opt.ColList
 	var iter scanIndexIter
-	iter.Init(c.e.mem, &c.im, scanPrivate, on, rejectInvertedIndexes)
-	iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool) {
+	iter.Init(c.e.evalCtx, c.e.f, c.e.mem, &c.im, scanPrivate, on, rejectInvertedIndexes)
+	iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool, constProj memo.ProjectionsExpr) {
 		// Find the longest prefix of index key columns that are constrained by
 		// an equality with another column or a constant.
 		numIndexKeyCols := index.LaxKeyColumnCount()
@@ -364,7 +364,11 @@ func (c *CustomFuncs) GenerateLookupJoins(
 		lookupJoin.Cols = lookupJoin.LookupExpr.OuterCols()
 		lookupJoin.Cols.UnionWith(inputProps.OutputCols)
 
-		if isCovering {
+		// TODO(mgartner): The right side of the join can "produce" columns held
+		// constant by a partial index predicate, but the lookup joiner does not
+		// currently support this. For now, if constProj is non-empty we
+		// consider the index non-covering.
+		if isCovering && len(constProj) == 0 {
 			// Case 1 (see function comment).
 			lookupJoin.Cols.UnionWith(scanPrivate.Cols)
 
@@ -639,8 +643,8 @@ func (c *CustomFuncs) GenerateInvertedJoins(
 	var optionalFilters memo.FiltersExpr
 
 	var iter scanIndexIter
-	iter.Init(c.e.mem, &c.im, scanPrivate, on, rejectNonInvertedIndexes)
-	iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool) {
+	iter.Init(c.e.evalCtx, c.e.f, c.e.mem, &c.im, scanPrivate, on, rejectNonInvertedIndexes)
+	iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, _ bool, _ memo.ProjectionsExpr) {
 		invertedJoin := memo.InvertedJoinExpr{Input: input}
 		numPrefixCols := index.NonInvertedPrefixColumnCount()