diff --git a/pkg/sql/opt/norm/custom_funcs.go b/pkg/sql/opt/norm/custom_funcs.go index d9d0e5e700ba..40d2a8c94bc4 100644 --- a/pkg/sql/opt/norm/custom_funcs.go +++ b/pkg/sql/opt/norm/custom_funcs.go @@ -1431,7 +1431,8 @@ func (c *CustomFuncs) CanMapOnSetOp(src *memo.FiltersItem) bool { func (c *CustomFuncs) MapSetOpFilterLeft( filter *memo.FiltersItem, set *memo.SetPrivate, ) opt.ScalarExpr { - return c.mapSetOpFilter(filter, set.OutCols, set.LeftCols) + colMap := makeMapFromColLists(set.OutCols, set.LeftCols) + return c.MapFiltersItemCols(filter, colMap) } // MapSetOpFilterRight maps the filter onto the right expression by replacing @@ -1441,34 +1442,32 @@ func (c *CustomFuncs) MapSetOpFilterLeft( func (c *CustomFuncs) MapSetOpFilterRight( filter *memo.FiltersItem, set *memo.SetPrivate, ) opt.ScalarExpr { - return c.mapSetOpFilter(filter, set.OutCols, set.RightCols) + colMap := makeMapFromColLists(set.OutCols, set.RightCols) + return c.MapFiltersItemCols(filter, colMap) } -// mapSetOpFilter maps filter expressions to dst by replacing occurrences of -// columns in src with corresponding columns in dst (the two lists must be of -// equal length). -// -// For each column in src that is not an outer column, SetMap replaces it with -// the corresponding column in dst. -// -// For example, consider this query: -// -// SELECT * FROM (SELECT x FROM a UNION SELECT y FROM b) WHERE x < 5 -// -// If mapSetOpFilter is called on the left subtree of the Union, the filter -// x < 5 propagates to that side after mapping the column IDs appropriately. -// WLOG, If setMap is called on the right subtree, the filter x < 5 will be -// mapped similarly to y < 5 on the right side. -func (c *CustomFuncs) mapSetOpFilter( - filter *memo.FiltersItem, src opt.ColList, dst opt.ColList, -) opt.ScalarExpr { - // Map each column in src to one column in dst to map the - // filters appropriately. +// makeMapFromColLists maps each column ID in src to a column ID in dst. The +// columns IDs are mapped based on their relative positions in the column lists, +// e.g. the third item in src maps to the third item in dst. The lists must be +// of equal length. +func makeMapFromColLists(src opt.ColList, dst opt.ColList) util.FastIntMap { + if len(src) != len(dst) { + panic(errors.AssertionFailedf("src and dst must have the same length, src: %v, dst: %v", src, dst)) + } + var colMap util.FastIntMap for colIndex, outColID := range src { colMap.Set(int(outColID), int(dst[colIndex])) } + return colMap +} +// MapFiltersItemCols maps filter expressions by replacing occurrences of +// the keys of colMap with the corresponding values. Outer columns are not +// replaced. +func (c *CustomFuncs) MapFiltersItemCols( + filter *memo.FiltersItem, colMap util.FastIntMap, +) opt.ScalarExpr { // Recursively walk the scalar sub-tree looking for references to columns // that need to be replaced and then replace them appropriately. var replace ReplaceFunc diff --git a/pkg/sql/opt/xform/custom_funcs.go b/pkg/sql/opt/xform/custom_funcs.go index dcc6cc76be25..0e3ee961c8b8 100644 --- a/pkg/sql/opt/xform/custom_funcs.go +++ b/pkg/sql/opt/xform/custom_funcs.go @@ -2268,6 +2268,99 @@ func (c *CustomFuncs) MakeOrderingChoiceFromColumn( return oc } +// DuplicateScanPrivate constructs a new ScanPrivate that is identical to the +// input, but has new table and column IDs. +// +// DuplicateScanPrivate can only be called on canonical ScanPrivates because not +// all scan properties are copied to the new ScanPrivate, e.g. constraints. +func (c *CustomFuncs) DuplicateScanPrivate(sp *memo.ScanPrivate) *memo.ScanPrivate { + if !c.IsCanonicalScan(sp) { + panic(errors.AssertionFailedf("input ScanPrivate must be canonical: %v", sp)) + } + + md := c.e.mem.Metadata() + tabMeta := md.TableMeta(sp.Table) + dupTabID := md.AddTable(tabMeta.Table, &tabMeta.Alias) + + var dupTabColIDs opt.ColSet + cols := sp.Cols + for i, ok := cols.Next(0); ok; i, ok = cols.Next(i + 1) { + ord := tabMeta.MetaID.ColumnOrdinal(i) + dupColID := dupTabID.ColumnID(ord) + dupTabColIDs.Add(dupColID) + } + + return &memo.ScanPrivate{ + Table: dupTabID, + Cols: dupTabColIDs, + Flags: sp.Flags, + Locking: sp.Locking, + } +} + +// MapScanFilterCols returns a new FiltersExpr with all the src column IDs in +// the input expression replaced with column IDs in dst. +// +// NOTE: Every ColumnID in src must map to the a ColumnID in dst with the same +// relative position in the ColSets. For example, if src and dst are (1, 5, 6) +// and (7, 12, 15), then the following mapping would be applied: +// +// 1 => 7 +// 5 => 12 +// 6 => 15 +func (c *CustomFuncs) MapScanFilterCols( + filters memo.FiltersExpr, src *memo.ScanPrivate, dst *memo.ScanPrivate, +) memo.FiltersExpr { + if src.Cols.Len() != dst.Cols.Len() { + panic(errors.AssertionFailedf( + "src and dst must have the same number of columns, src.Cols: %v, dst.Cols: %v", + src.Cols, + dst.Cols, + )) + } + + // Map each column in src to a column in dst based on the relative position + // of both the src and dst ColumnIDs in the ColSet. + var colMap util.FastIntMap + dstCol, _ := dst.Cols.Next(0) + for srcCol, ok := src.Cols.Next(0); ok; srcCol, ok = src.Cols.Next(srcCol + 1) { + colMap.Set(int(srcCol), int(dstCol)) + dstCol, _ = dst.Cols.Next(dstCol + 1) + } + + // Map the columns of each filter in the FiltersExpr. + newFilters := make([]memo.FiltersItem, 0, len(filters)) + for i := range filters { + expr := c.MapFiltersItemCols(&filters[i], colMap) + newFilters = append(newFilters, c.e.f.ConstructFiltersItem(expr)) + } + + return newFilters +} + +// ExprOuterCols returns the outer columns of the given expression. +// +// Note that ExprOuterCols traverses the Expr tree rather than returning the +// ColSet from cached shared properties. This is because shared properties are +// not cached for all Expr types. +func (c *CustomFuncs) ExprOuterCols(expr opt.Expr) opt.ColSet { + var p props.Shared + memo.BuildSharedProps(expr, &p) + return p.OuterCols +} + +// MakeSetPrivateForUnionSelects constructs a new SetPrivate with column sets +// from the left, right, and output of the operation. +func (c *CustomFuncs) MakeSetPrivateForUnionSelects( + left, right, out *memo.ScanPrivate, +) *memo.SetPrivate { + return &memo.SetPrivate{ + LeftCols: opt.ColSetToList(left.Cols), + RightCols: opt.ColSetToList(right.Cols), + OutCols: opt.ColSetToList(out.Cols), + } +} + // scanIndexIter is a helper struct that supports iteration over the indexes // of a Scan operator table. For example: // diff --git a/pkg/sql/opt/xform/rules/select.opt b/pkg/sql/opt/xform/rules/select.opt index 6daed913891c..224a055af5ce 100644 --- a/pkg/sql/opt/xform/rules/select.opt +++ b/pkg/sql/opt/xform/rules/select.opt @@ -12,8 +12,8 @@ # and examples. [GenerateConstrainedScans, Explore] (Select - (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate)) - $filters:* + (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate)) + $filters:* ) => (GenerateConstrainedScans $scanPrivate $filters) @@ -22,8 +22,73 @@ # be serviced by an inverted index. [GenerateInvertedIndexScans, Explore] (Select - (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate) & (HasInvertedIndexes $scanPrivate)) - $filters:* + (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate) & (HasInvertedIndexes $scanPrivate)) + $filters:* ) => (GenerateInvertedIndexScans $scanPrivate $filters) + +# GenerateUnionSelects splits disjunctions (Or expressions) into a Union of two +# Select expressions, the first containing the left sub-expression of the Or +# expression and the second containing the right sub-expression. All other +# filter items in the original expression are preserved in the new Select +# expressions. +# +# This can produce better query plans in cases where indexes cover both sides of +# the Or expression. The execution plan can use both indexes to satisfy both +# sides of the disjunction and union the results together. +# +# Note that this rule only matches Selects with canonical scans. Therefore scan +# constraints do not need to be duplicated in the left and right scans of the +# union. +# +# Also note that this rule only matches Selects that have strict keys. This is +# required to prevent the generated Union from de-duplicating rows that have +# the same selected values. For example, consider the following: +# +# CREATE TABLE t (k INT PRIMARY KEY, a INT, b INT) +# INSERT INTO t VALUES (1, 1, 3) +# INSERT INTO t VALUES (2, 1, 3) +# +# The expected result of the following Select query is 2 rows, with values +# (1, 3). +# +# SELECT a, b FROM t WHERE a = 1 OR b = 3 +# +# However, Union de-duplicates all tuples with the same set of values. So, the +# query below returns only a single row. +# +# SELECT a, b FROM t WHERE a = 1 +# UNION +# SELECT a, b FROM t WHERE b = 3 +# +# With a key in the output columns, each input row to the Union is guaranteed to +# be unique, and therefore will not be incorrectly de-duplicated. +[GenerateUnionSelects, Explore] +(Select + $input:(Scan + $scanPrivate:* & (IsCanonicalScan $scanPrivate) + ) & (HasStrictKey $input) + $filters:[ + ... + $item:(FiltersItem (Or $left:* $right:*)) & + ^(ColsAreEqual (ExprOuterCols $left) (ExprOuterCols $right)) + ... + ] +) +=> +(Union + (Select + $input + (ReplaceFiltersItem $filters $item $left) + ) + (Select + (Scan $rightScan:(DuplicateScanPrivate $scanPrivate)) + (MapScanFilterCols + (ReplaceFiltersItem $filters $item $right) + $scanPrivate + $rightScan + ) + ) + (MakeSetPrivateForUnionSelects $scanPrivate $rightScan $scanPrivate) +) diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index df625d8a68c3..954dbee73daf 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -31,6 +31,18 @@ CREATE TABLE c ) ---- +exec-ddl +CREATE TABLE d +( + k INT PRIMARY KEY, + u INT, + v INT, + w INT, + INDEX u(u), + INDEX v(v) +) +---- + # -------------------------------------------------- # GenerateConstrainedScans # -------------------------------------------------- @@ -1008,3 +1020,345 @@ project │ └── fd: (1)-->(2) └── filters └── a:2 IS NULL [outer=(2), constraints=(/2: [/NULL - /NULL]; tight), fd=()-->(2)] + + +# -------------------------------------------------- +# GenerateUnionSelects +# -------------------------------------------------- + +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE u = 1 OR v = 1 +---- +project + ├── columns: k:1!null + ├── key: (1) + └── union + ├── columns: k:1!null u:2 v:3 + ├── left columns: k:1!null u:2 v:3 + ├── right columns: k:5 u:6 v:7 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── index-join d + │ ├── columns: k:1!null u:2!null v:3 + │ ├── key: (1) + │ ├── fd: ()-->(2), (1)-->(3) + │ └── scan d@u + │ ├── columns: k:1!null u:2!null + │ ├── constraint: /2/1: [/1 - /1] + │ ├── key: (1) + │ └── fd: ()-->(2) + └── index-join d + ├── columns: k:5!null u:6 v:7!null + ├── key: (5) + ├── fd: ()-->(7), (5)-->(6) + └── scan d@v + ├── columns: k:5!null v:7!null + ├── constraint: /7/5: [/1 - /1] + ├── key: (5) + └── fd: ()-->(7) + +opt expect=GenerateUnionSelects +SELECT * FROM d WHERE w = 1 AND (u = 1 OR v = 1) +---- +union + ├── columns: k:1!null u:2 v:3 w:4!null + ├── left columns: k:1!null u:2 v:3 w:4!null + ├── right columns: k:5 u:6 v:7 w:8 + ├── key: (1) + ├── fd: ()-->(4), (1)-->(2,3) + ├── select + │ ├── columns: k:1!null u:2!null v:3 w:4!null + │ ├── key: (1) + │ ├── fd: ()-->(2,4), (1)-->(3) + │ ├── index-join d + │ │ ├── columns: k:1!null u:2 v:3 w:4 + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(3,4) + │ │ └── scan d@u + │ │ ├── columns: k:1!null u:2!null + │ │ ├── constraint: /2/1: [/1 - /1] + │ │ ├── key: (1) + │ │ └── fd: ()-->(2) + │ └── filters + │ └── w:4 = 1 [outer=(4), constraints=(/4: [/1 - /1]; tight), fd=()-->(4)] + └── select + ├── columns: k:5!null u:6 v:7!null w:8!null + ├── key: (5) + ├── fd: ()-->(7,8), (5)-->(6) + ├── index-join d + │ ├── columns: k:5!null u:6 v:7 w:8 + │ ├── key: (5) + │ ├── fd: ()-->(7), (5)-->(6,8) + │ └── scan d@v + │ ├── columns: k:5!null v:7!null + │ ├── constraint: /7/5: [/1 - /1] + │ ├── key: (5) + │ └── fd: ()-->(7) + └── filters + └── w:8 = 1 [outer=(8), constraints=(/8: [/1 - /1]; tight), fd=()-->(8)] + +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE (u = 1 OR v = 2) AND (u = 10 OR v = 20) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── union + ├── columns: k:1!null u:2 v:3 + ├── left columns: k:1!null u:2 v:3 + ├── right columns: k:5 u:6 v:7 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── inner-join (zigzag d@u d@v) + │ ├── columns: k:1!null u:2!null v:3!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = [1] + │ ├── right fixed columns: [3] = [20] + │ ├── key: (1) + │ ├── fd: ()-->(2,3) + │ └── filters + │ ├── u:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + │ └── v:3 = 20 [outer=(3), constraints=(/3: [/20 - /20]; tight), fd=()-->(3)] + └── inner-join (zigzag d@u d@v) + ├── columns: k:5!null u:6!null v:7!null + ├── eq columns: [5] = [5] + ├── left fixed columns: [6] = [10] + ├── right fixed columns: [7] = [2] + ├── key: (5) + ├── fd: ()-->(6,7) + └── filters + ├── v:7 = 2 [outer=(7), constraints=(/7: [/2 - /2]; tight), fd=()-->(7)] + └── u:6 = 10 [outer=(6), constraints=(/6: [/10 - /10]; tight), fd=()-->(6)] + +# Don't expand INs to many ORs. +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE u IN (1, 2, 3, 4) OR v IN (5, 6, 7, 8) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── union + ├── columns: k:1!null u:2 v:3 + ├── left columns: k:1!null u:2 v:3 + ├── right columns: k:5 u:6 v:7 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── index-join d + │ ├── columns: k:1!null u:2!null v:3 + │ ├── key: (1) + │ ├── fd: (1)-->(2,3) + │ └── scan d@u + │ ├── columns: k:1!null u:2!null + │ ├── constraint: /2/1: [/1 - /4] + │ ├── key: (1) + │ └── fd: (1)-->(2) + └── index-join d + ├── columns: k:5!null u:6 v:7!null + ├── key: (5) + ├── fd: (5)-->(6,7) + └── scan d@v + ├── columns: k:5!null v:7!null + ├── constraint: /7/5: [/5 - /8] + ├── key: (5) + └── fd: (5)-->(7) + +# Uncorrelated subquery. +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE (u = 1 OR v = 1) AND EXISTS (SELECT u, v FROM a) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── union + ├── columns: d.k:1!null d.u:2 d.v:3 + ├── left columns: d.k:1!null d.u:2 d.v:3 + ├── right columns: d.k:8 d.u:9 d.v:10 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── index-join d + │ ├── columns: d.k:1!null d.u:2!null d.v:3 + │ ├── key: (1) + │ ├── fd: ()-->(2), (1)-->(3) + │ └── select + │ ├── columns: d.k:1!null d.u:2!null + │ ├── key: (1) + │ ├── fd: ()-->(2) + │ ├── scan d@u + │ │ ├── columns: d.k:1!null d.u:2!null + │ │ ├── constraint: /2/1: [/1 - /1] + │ │ ├── key: (1) + │ │ └── fd: ()-->(2) + │ └── filters + │ └── exists [subquery] + │ └── scan a + │ ├── columns: a.u:6 a.v:7 + │ ├── limit: 1 + │ ├── key: () + │ └── fd: ()-->(6,7) + └── index-join d + ├── columns: d.k:8!null d.u:9 d.v:10!null + ├── key: (8) + ├── fd: ()-->(10), (8)-->(9) + └── select + ├── columns: d.k:8!null d.v:10!null + ├── key: (8) + ├── fd: ()-->(10) + ├── scan d@v + │ ├── columns: d.k:8!null d.v:10!null + │ ├── constraint: /10/8: [/1 - /1] + │ ├── key: (8) + │ └── fd: ()-->(10) + └── filters + └── exists [subquery] + └── scan a + ├── columns: a.u:6 a.v:7 + ├── limit: 1 + ├── key: () + └── fd: ()-->(6,7) + +# Correlated subquery. +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE (u = 1 OR v = 1) AND EXISTS (SELECT * FROM a WHERE a.u = d.u) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── project + ├── columns: d.k:1!null d.u:2 d.v:3 + ├── key: (1) + ├── fd: (1)-->(2,3) + └── inner-join (hash) + ├── columns: d.k:1!null d.u:2!null d.v:3 a.u:6!null + ├── key: (1) + ├── fd: (1)-->(2,3), (2)==(6), (6)==(2) + ├── union + │ ├── columns: d.k:1!null d.u:2 d.v:3 + │ ├── left columns: d.k:1!null d.u:2 d.v:3 + │ ├── right columns: d.k:8 d.u:9 d.v:10 + │ ├── key: (1) + │ ├── fd: (1)-->(2,3) + │ ├── index-join d + │ │ ├── columns: d.k:1!null d.u:2!null d.v:3 + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(3) + │ │ └── scan d@u + │ │ ├── columns: d.k:1!null d.u:2!null + │ │ ├── constraint: /2/1: [/1 - /1] + │ │ ├── key: (1) + │ │ └── fd: ()-->(2) + │ └── index-join d + │ ├── columns: d.k:8!null d.u:9 d.v:10!null + │ ├── key: (8) + │ ├── fd: ()-->(10), (8)-->(9) + │ └── scan d@v + │ ├── columns: d.k:8!null d.v:10!null + │ ├── constraint: /10/8: [/1 - /1] + │ ├── key: (8) + │ └── fd: ()-->(10) + ├── distinct-on + │ ├── columns: a.u:6 + │ ├── grouping columns: a.u:6 + │ ├── internal-ordering: +6 + │ ├── key: (6) + │ └── scan a@u + │ ├── columns: a.u:6 + │ └── ordering: +6 + └── filters + └── a.u:6 = d.u:2 [outer=(2,6), constraints=(/2: (/NULL - ]; /6: (/NULL - ]), fd=(2)==(6), (6)==(2)] + +# Correlated subquery with references to outer columns not in the scan columns. +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE (u = 1 OR v = 1) AND EXISTS (SELECT * FROM a WHERE a.u = d.w) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── project + ├── columns: d.k:1!null d.u:2 d.v:3 w:4 + ├── key: (1) + ├── fd: (1)-->(2-4) + └── inner-join (hash) + ├── columns: d.k:1!null d.u:2 d.v:3 w:4!null a.u:6!null + ├── key: (1) + ├── fd: (1)-->(2-4), (4)==(6), (6)==(4) + ├── union + │ ├── columns: d.k:1!null d.u:2 d.v:3 w:4 + │ ├── left columns: d.k:1!null d.u:2 d.v:3 w:4 + │ ├── right columns: d.k:8 d.u:9 d.v:10 w:11 + │ ├── key: (1) + │ ├── fd: (1)-->(2-4) + │ ├── index-join d + │ │ ├── columns: d.k:1!null d.u:2!null d.v:3 w:4 + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(3,4) + │ │ └── scan d@u + │ │ ├── columns: d.k:1!null d.u:2!null + │ │ ├── constraint: /2/1: [/1 - /1] + │ │ ├── key: (1) + │ │ └── fd: ()-->(2) + │ └── index-join d + │ ├── columns: d.k:8!null d.u:9 d.v:10!null w:11 + │ ├── key: (8) + │ ├── fd: ()-->(10), (8)-->(9,11) + │ └── scan d@v + │ ├── columns: d.k:8!null d.v:10!null + │ ├── constraint: /10/8: [/1 - /1] + │ ├── key: (8) + │ └── fd: ()-->(10) + ├── distinct-on + │ ├── columns: a.u:6 + │ ├── grouping columns: a.u:6 + │ ├── internal-ordering: +6 + │ ├── key: (6) + │ └── scan a@u + │ ├── columns: a.u:6 + │ └── ordering: +6 + └── filters + └── a.u:6 = w:4 [outer=(4,6), constraints=(/4: (/NULL - ]; /6: (/NULL - ]), fd=(4)==(6), (6)==(4)] + +# Don't apply to queries without strict keys. +opt expect-not=GenerateUnionSelects +SELECT u, v FROM d WHERE u = 1 OR v = 1 +---- +select + ├── columns: u:2 v:3 + ├── scan d + │ └── columns: u:2 v:3 + └── filters + └── (u:2 = 1) OR (v:3 = 1) [outer=(2,3)] + +# Don't apply to disjunctions with identical colsets on the left and right. +opt expect-not=GenerateUnionSelects +SELECT k FROM d WHERE u = 1 OR u = 5 +---- +project + ├── columns: k:1!null + ├── key: (1) + └── scan d@u + ├── columns: k:1!null u:2!null + ├── constraint: /2/1 + │ ├── [/1 - /1] + │ └── [/5 - /5] + ├── key: (1) + └── fd: (1)-->(2) + +# Verifies that flags are copied to the duplicated scan. By forcing a single +# index, the generated expression with a UNION has a higher cost and is not +# part of the final expression. +opt +SELECT k FROM d@primary WHERE u = 1 OR v = 1 +---- +project + ├── columns: k:1!null + ├── key: (1) + └── select + ├── columns: k:1!null u:2 v:3 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── scan d + │ ├── columns: k:1!null u:2 v:3 + │ ├── flags: force-index=primary + │ ├── key: (1) + │ └── fd: (1)-->(2,3) + └── filters + └── (u:2 = 1) OR (v:3 = 1) [outer=(2,3)]