Skip to content

Commit

Permalink
Merge #62406
Browse files Browse the repository at this point in the history
62406: opt: produce constant columns in predicate from partial index scan r=mgartner a=mgartner

#### opt: refactor indexScanBuilder

This commit refactors `indexScanBuilder` to make it safer to use and
maintain.

  - Functions that are intended for use outside of the struct's
    implementation have been capitalized to make this more apparent.
  - Helper functions that determine the state of the builder have been
    added to simplify the internal logic of the builder.
  - `primaryKeyCols` was removed because it is unrelated to building
    scans.

Release note: None

#### opt: produce constant columns in predicate from partial index scan

`GenerateConstrainedScans` and `GeneratePartialIndexScans` now build
Project expressions to produce columns that are held constant in partial
index predicates when possible. This allows a partial index to cover
columns and avoid IndexJoins in more cases.

A column held constant in a partial index predicate is only projected
when:

  1. The index does not include the column.
  2. All columns can be covered because they are either in the index or
     held constant by the predicate. In other words, a Project is only
     built when doing so eliminates an IndexJoin.

Resolves #60532

Release note (performance improvement): Columns that are held constant
in partial index predicates can now be produced when scanning the
partial index. This eliminates unnecessary primary index joins to
retrieve those constant columns in some queries, resulting in lower
latency.


Co-authored-by: Marcus Gartner <[email protected]>
  • Loading branch information
craig[bot] and mgartner committed Mar 26, 2021
2 parents bd6b86d + 3bb6aa1 commit 3cfe2a3
Show file tree
Hide file tree
Showing 10 changed files with 874 additions and 382 deletions.
41 changes: 23 additions & 18 deletions pkg/sql/opt/memo/testdata/stats/partial-index-scan
Original file line number Diff line number Diff line change
Expand Up @@ -430,17 +430,20 @@ CREATE TABLE t (
opt
SELECT * FROM t WHERE pk2 = 1 AND b1 = false AND b2 = false
----
index-join t
project
├── columns: pk1:1(int!null) pk2:2(int!null) b1:3(bool!null) b2:4(bool!null)
├── stats: [rows=1.245025, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0, distinct(2-4)=1, null(2-4)=0]
├── key: (1)
├── fd: ()-->(2-4)
└── scan t@secondary,partial
├── columns: pk1:1(int!null) pk2:2(int!null)
├── constraint: /2/1: [/1 - /1]
├── stats: [rows=1.245025, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0, distinct(2-4)=1, null(2-4)=0]
├── key: (1)
└── fd: ()-->(2)
├── scan t@secondary,partial
│ ├── columns: pk1:1(int!null) pk2:2(int!null)
│ ├── constraint: /2/1: [/1 - /1]
│ ├── stats: [rows=1.245025, distinct(2)=1, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=1, null(4)=0, distinct(2-4)=1, null(2-4)=0]
│ ├── key: (1)
│ └── fd: ()-->(2)
└── projections
├── false [as=b1:3, type=bool]
└── false [as=b2:4, type=bool]


# ---------------------
Expand Down Expand Up @@ -701,7 +704,7 @@ CREATE INDEX idx ON hist (i) WHERE i > 100 AND i <= 200 AND s = 'banana'
opt
SELECT * FROM hist WHERE i > 125 AND i < 150 AND s = 'banana'
----
index-join hist
project
├── columns: k:1(int!null) i:2(int!null) s:3(string!null)
├── stats: [rows=6.91433927, distinct(2)=3.09090909, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=3.09090909, null(2,3)=0]
│ histogram(2)= 0 0 6.6262 0.2881
Expand All @@ -710,16 +713,18 @@ index-join hist
│ <--- 'banana'
├── key: (1)
├── fd: ()-->(3), (1)-->(2)
└── scan hist@idx,partial
├── columns: k:1(int!null) i:2(int!null)
├── constraint: /2/1: [/126 - /149]
├── stats: [rows=6.91433927, distinct(2)=3.09090909, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=3.09090909, null(2,3)=0]
│ histogram(2)= 0 0 6.6262 0.2881
│ <--- 125 -------- 149 -
│ histogram(3)= 0 6.9143
│ <--- 'banana'
├── key: (1)
└── fd: (1)-->(2)
├── scan hist@idx,partial
│ ├── columns: k:1(int!null) i:2(int!null)
│ ├── constraint: /2/1: [/126 - /149]
│ ├── stats: [rows=6.91433927, distinct(2)=3.09090909, null(2)=0, distinct(3)=1, null(3)=0, distinct(2,3)=3.09090909, null(2,3)=0]
│ │ histogram(2)= 0 0 6.6262 0.2881
│ │ <--- 125 -------- 149 -
│ │ histogram(3)= 0 6.9143
│ │ <--- 'banana'
│ ├── key: (1)
│ └── fd: (1)-->(2)
└── projections
└── 'banana' [as=s:3, type=string]

exec-ddl
DROP INDEX idx
Expand Down
176 changes: 111 additions & 65 deletions pkg/sql/opt/xform/index_scan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ package xform
import (
"github.com/cockroachdb/cockroach/pkg/sql/inverted"
"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
"github.com/cockroachdb/cockroach/pkg/sql/opt/invertedexpr"
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/cockroach/pkg/sql/opt/norm"
Expand All @@ -32,26 +31,27 @@ import (
// make the following calls:
//
// var sb indexScanBuilder
// sb.init(c, tabID)
// sb.setScan(scanPrivate)
// sb.addSelect(filters)
// sb.addIndexJoin(cols)
// expr := sb.build()
// sb.Init(c, tabID)
// sb.SetScan(scanPrivate)
// sb.AddSelect(filters)
// sb.AddIndexJoin(cols)
// expr := sb.Build()
//
type indexScanBuilder struct {
c *CustomFuncs
f *norm.Factory
mem *memo.Memo
tabID opt.TableID
pkCols opt.ColSet
scanPrivate memo.ScanPrivate
constProjections memo.ProjectionsExpr
innerFilters memo.FiltersExpr
outerFilters memo.FiltersExpr
invertedFilterPrivate memo.InvertedFilterPrivate
indexJoinPrivate memo.IndexJoinPrivate
}

func (b *indexScanBuilder) init(c *CustomFuncs, tabID opt.TableID) {
// Init initializes an indexScanBuilder.
func (b *indexScanBuilder) Init(c *CustomFuncs, tabID opt.TableID) {
// This initialization pattern ensures that fields are not unwittingly
// reused. Field reuse must be explicit.
*b = indexScanBuilder{
Expand All @@ -62,42 +62,46 @@ func (b *indexScanBuilder) init(c *CustomFuncs, tabID opt.TableID) {
}
}

// primaryKeyCols returns the columns from the scanned table's primary index.
func (b *indexScanBuilder) primaryKeyCols() opt.ColSet {
// Ensure that pkCols set is initialized with the primary index columns.
if b.pkCols.Empty() {
primaryIndex := b.c.e.mem.Metadata().Table(b.tabID).Index(cat.PrimaryIndex)
for i, cnt := 0, primaryIndex.KeyColumnCount(); i < cnt; i++ {
b.pkCols.Add(b.tabID.IndexColumnID(primaryIndex, i))
}
// SetScan constructs a standalone Scan expression. As a side effect, it clears
// any expressions added during previous invocations of the builder. SetScan
// makes a copy of scanPrivate so that it doesn't escape.
func (b *indexScanBuilder) SetScan(scanPrivate *memo.ScanPrivate) {
*b = indexScanBuilder{
c: b.c,
f: b.f,
mem: b.mem,
tabID: b.tabID,
scanPrivate: *scanPrivate,
}
return b.pkCols
}

// setScan constructs a standalone Scan expression. As a side effect, it clears
// any expressions added during previous invocations of the builder. setScan
// makes a copy of scanPrivate so that it doesn't escape.
func (b *indexScanBuilder) setScan(scanPrivate *memo.ScanPrivate) {
b.scanPrivate = *scanPrivate
b.innerFilters = nil
b.outerFilters = nil
b.invertedFilterPrivate = memo.InvertedFilterPrivate{}
b.indexJoinPrivate = memo.IndexJoinPrivate{}
// AddConstProjections wraps the input expression with a Project expression with
// the given constant projection expressions.
func (b *indexScanBuilder) AddConstProjections(proj memo.ProjectionsExpr) {
if len(proj) != 0 {
if b.hasConstProjections() {
panic(errors.AssertionFailedf("cannot call AddConstProjections twice"))
}
if b.hasInnerFilters() || b.hasOuterFilters() {
panic(errors.AssertionFailedf("cannot call AddConstProjections after filters are added"))
}
b.constProjections = proj
}
}

// addInvertedFilter wraps the input expression with an InvertedFilter
// AddInvertedFilter wraps the input expression with an InvertedFilter
// expression having the given span expression.
func (b *indexScanBuilder) addInvertedFilter(
func (b *indexScanBuilder) AddInvertedFilter(
spanExpr *inverted.SpanExpression,
pfState *invertedexpr.PreFiltererStateForInvertedFilterer,
invertedCol opt.ColumnID,
) {
if spanExpr != nil {
if b.invertedFilterPrivate.InvertedColumn != 0 {
panic(errors.AssertionFailedf("cannot call addInvertedFilter twice"))
if b.hasInvertedFilter() {
panic(errors.AssertionFailedf("cannot call AddInvertedFilter twice"))
}
if b.indexJoinPrivate.Table != 0 {
panic(errors.AssertionFailedf("cannot add inverted filter after index join is added"))
if b.hasIndexJoin() {
panic(errors.AssertionFailedf("cannot call AddInvertedFilter after index join is added"))
}
b.invertedFilterPrivate = memo.InvertedFilterPrivate{
InvertedExpression: spanExpr,
Expand All @@ -107,30 +111,30 @@ func (b *indexScanBuilder) addInvertedFilter(
}
}

// addSelect wraps the input expression with a Select expression having the
// AddSelect wraps the input expression with a Select expression having the
// given filter.
func (b *indexScanBuilder) addSelect(filters memo.FiltersExpr) {
func (b *indexScanBuilder) AddSelect(filters memo.FiltersExpr) {
if len(filters) != 0 {
if b.indexJoinPrivate.Table == 0 {
if b.innerFilters != nil {
panic(errors.AssertionFailedf("cannot call addSelect methods twice before index join is added"))
if !b.hasIndexJoin() {
if b.hasInnerFilters() {
panic(errors.AssertionFailedf("cannot call AddSelect methods twice before index join is added"))
}
b.innerFilters = filters
} else {
if b.outerFilters != nil {
panic(errors.AssertionFailedf("cannot call addSelect methods twice after index join is added"))
if b.hasOuterFilters() {
panic(errors.AssertionFailedf("cannot call AddSelect methods twice after index join is added"))
}
b.outerFilters = filters
}
}
}

// addSelectAfterSplit first splits the given filter into two parts: a filter
// AddSelectAfterSplit first splits the given filter into two parts: a filter
// that only involves columns in the given set, and a remaining filter that
// includes everything else. The filter that is bound by the columns becomes a
// Select expression that wraps the input expression, and the remaining filter
// is returned (or 0 if there is no remaining filter).
func (b *indexScanBuilder) addSelectAfterSplit(
func (b *indexScanBuilder) AddSelectAfterSplit(
filters memo.FiltersExpr, cols opt.ColSet,
) (remainingFilters memo.FiltersExpr) {
if len(filters) == 0 {
Expand All @@ -139,7 +143,7 @@ func (b *indexScanBuilder) addSelectAfterSplit(

if b.c.FiltersBoundBy(filters, cols) {
// Filter is fully bound by the cols, so add entire filter.
b.addSelect(filters)
b.AddSelect(filters)
return nil
}

Expand All @@ -152,48 +156,63 @@ func (b *indexScanBuilder) addSelectAfterSplit(
}

// Add conditions which are fully bound by the cols and return the rest.
b.addSelect(boundConditions)
b.AddSelect(boundConditions)
return b.c.ExtractUnboundConditions(filters, cols)
}

// addIndexJoin wraps the input expression with an IndexJoin expression that
// AddIndexJoin wraps the input expression with an IndexJoin expression that
// produces the given set of columns by lookup in the primary index.
func (b *indexScanBuilder) addIndexJoin(cols opt.ColSet) {
if b.indexJoinPrivate.Table != 0 {
panic(errors.AssertionFailedf("cannot call addIndexJoin twice"))
func (b *indexScanBuilder) AddIndexJoin(cols opt.ColSet) {
if b.hasIndexJoin() {
panic(errors.AssertionFailedf("cannot call AddIndexJoin twice"))
}
if b.outerFilters != nil {
panic(errors.AssertionFailedf("cannot add index join after an outer filter has been added"))
if b.hasOuterFilters() {
panic(errors.AssertionFailedf("cannot call AddIndexJoin after an outer filter has been added"))
}
b.indexJoinPrivate = memo.IndexJoinPrivate{
Table: b.tabID,
Cols: cols,
}
}

// build constructs the final memo expression by composing together the various
// Build constructs the final memo expression by composing together the various
// expressions that were specified by previous calls to various add methods.
func (b *indexScanBuilder) build(grp memo.RelExpr) {
func (b *indexScanBuilder) Build(grp memo.RelExpr) {
// 1. Only scan.
if len(b.innerFilters) == 0 && b.indexJoinPrivate.Table == 0 {
if !b.hasConstProjections() && !b.hasInnerFilters() && !b.hasInvertedFilter() && !b.hasIndexJoin() {
b.mem.AddScanToGroup(&memo.ScanExpr{ScanPrivate: b.scanPrivate}, grp)
return
}

// 2. Wrap scan in inner filter if it was added.
input := b.f.ConstructScan(&b.scanPrivate)
if len(b.innerFilters) != 0 {
if b.indexJoinPrivate.Table == 0 && b.invertedFilterPrivate.InvertedColumn == 0 {

// 2. Wrap input in a Project if constant projections were added.
if b.hasConstProjections() {
if !b.hasInnerFilters() && !b.hasInvertedFilter() && !b.hasIndexJoin() {
b.mem.AddProjectToGroup(&memo.ProjectExpr{
Input: input,
Projections: b.constProjections,
Passthrough: b.scanPrivate.Cols,
}, grp)
return
}

input = b.f.ConstructProject(input, b.constProjections, b.scanPrivate.Cols)
}

// 3. Wrap input in inner filter if it was added.
if b.hasInnerFilters() {
if !b.hasInvertedFilter() && !b.hasIndexJoin() {
b.mem.AddSelectToGroup(&memo.SelectExpr{Input: input, Filters: b.innerFilters}, grp)
return
}

input = b.f.ConstructSelect(input, b.innerFilters)
}

// 3. Wrap input in inverted filter if it was added.
if b.invertedFilterPrivate.InvertedColumn != 0 {
if b.indexJoinPrivate.Table == 0 {
// 4. Wrap input in inverted filter if it was added.
if b.hasInvertedFilter() {
if !b.hasIndexJoin() {
invertedFilter := &memo.InvertedFilterExpr{
Input: input, InvertedFilterPrivate: b.invertedFilterPrivate,
}
Expand All @@ -204,9 +223,9 @@ func (b *indexScanBuilder) build(grp memo.RelExpr) {
input = b.f.ConstructInvertedFilter(input, &b.invertedFilterPrivate)
}

// 4. Wrap input in index join if it was added.
if b.indexJoinPrivate.Table != 0 {
if len(b.outerFilters) == 0 {
// 5. Wrap input in index join if it was added.
if b.hasIndexJoin() {
if !b.hasOuterFilters() {
indexJoin := &memo.IndexJoinExpr{Input: input, IndexJoinPrivate: b.indexJoinPrivate}
b.mem.AddIndexJoinToGroup(indexJoin, grp)
return
Expand All @@ -215,11 +234,38 @@ func (b *indexScanBuilder) build(grp memo.RelExpr) {
input = b.f.ConstructIndexJoin(input, &b.indexJoinPrivate)
}

// 5. Wrap input in outer filter (which must exist at this point).
if len(b.outerFilters) == 0 {
// indexJoinDef == 0: outerFilters == 0 handled by #1 and #2 above.
// indexJoinDef != 0: outerFilters == 0 handled by #3 above.
// 6. Wrap input in outer filter (which must exist at this point).
if !b.hasOuterFilters() {
// indexJoinDef == 0: outerFilters == 0 handled by #1-4 above.
// indexJoinDef != 0: outerFilters == 0 handled by #5 above.
panic(errors.AssertionFailedf("outer filter cannot be 0 at this point"))
}
b.mem.AddSelectToGroup(&memo.SelectExpr{Input: input, Filters: b.outerFilters}, grp)
}

// hasConstProjections returns true if constant projections have been added to
// the builder.
func (b *indexScanBuilder) hasConstProjections() bool {
return len(b.constProjections) != 0
}

// hasInnerFilters returns true if inner filters have been added to the builder.
func (b *indexScanBuilder) hasInnerFilters() bool {
return len(b.innerFilters) != 0
}

// hasOuterFilters returns true if outer filters have been added to the builder.
func (b *indexScanBuilder) hasOuterFilters() bool {
return len(b.outerFilters) != 0
}

// hasInvertedFilter returns true if inverted filters have been added to the
// builder.
func (b *indexScanBuilder) hasInvertedFilter() bool {
return b.invertedFilterPrivate.InvertedColumn != 0
}

// hasIndexJoin returns true if an index join has been added to the builder.
func (b *indexScanBuilder) hasIndexJoin() bool {
return b.indexJoinPrivate.Table != 0
}
14 changes: 9 additions & 5 deletions pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ func (c *CustomFuncs) GenerateLookupJoins(

var pkCols opt.ColList
var iter scanIndexIter
iter.Init(c.e.mem, &c.im, scanPrivate, on, rejectInvertedIndexes)
iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool) {
iter.Init(c.e.evalCtx, c.e.f, c.e.mem, &c.im, scanPrivate, on, rejectInvertedIndexes)
iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool, constProj memo.ProjectionsExpr) {
// Find the longest prefix of index key columns that are constrained by
// an equality with another column or a constant.
numIndexKeyCols := index.LaxKeyColumnCount()
Expand Down Expand Up @@ -364,7 +364,11 @@ func (c *CustomFuncs) GenerateLookupJoins(
lookupJoin.Cols = lookupJoin.LookupExpr.OuterCols()
lookupJoin.Cols.UnionWith(inputProps.OutputCols)

if isCovering {
// TODO(mgartner): The right side of the join can "produce" columns held
// constant by a partial index predicate, but the lookup joiner does not
// currently support this. For now, if constProj is non-empty we
// consider the index non-covering.
if isCovering && len(constProj) == 0 {
// Case 1 (see function comment).
lookupJoin.Cols.UnionWith(scanPrivate.Cols)

Expand Down Expand Up @@ -639,8 +643,8 @@ func (c *CustomFuncs) GenerateInvertedJoins(
var optionalFilters memo.FiltersExpr

var iter scanIndexIter
iter.Init(c.e.mem, &c.im, scanPrivate, on, rejectNonInvertedIndexes)
iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, isCovering bool) {
iter.Init(c.e.evalCtx, c.e.f, c.e.mem, &c.im, scanPrivate, on, rejectNonInvertedIndexes)
iter.ForEach(func(index cat.Index, onFilters memo.FiltersExpr, indexCols opt.ColSet, _ bool, _ memo.ProjectionsExpr) {
invertedJoin := memo.InvertedJoinExpr{Input: input}
numPrefixCols := index.NonInvertedPrefixColumnCount()

Expand Down
Loading

0 comments on commit 3cfe2a3

Please sign in to comment.