From 0a7faf0b0d83bb6aa97559f0c7ecd5fee59b72fe Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Fri, 27 Mar 2020 13:23:54 -0700 Subject: [PATCH] opt: add GenerateUnionSelects exploration rule for disjunction This commit adds a new exploration rule that can produce better query plans for disjunctions (e.g. a = 1 OR b = 2). The rule transforms some Select + Scan expressions with a disjunction filter into a Union of two Select expressions, each with one side of the disjuction as a filter. This can result in faster query plans in cases where two indexes cover each side of the disjunction. This rule only applies for Scan expressions that contain a strict key. Fixes #2142 Release note (performance improvement): The query optimizer now produces faster query plans for some disjunctions (OR expressions) by utilizing multiple indexes. --- pkg/sql/opt/xform/custom_funcs.go | 94 ++++++++ pkg/sql/opt/xform/rules/select.opt | 50 ++++- pkg/sql/opt/xform/testdata/rules/select | 283 ++++++++++++++++++++++++ 3 files changed, 423 insertions(+), 4 deletions(-) diff --git a/pkg/sql/opt/xform/custom_funcs.go b/pkg/sql/opt/xform/custom_funcs.go index dcc6cc76be25..1a5a56a6092e 100644 --- a/pkg/sql/opt/xform/custom_funcs.go +++ b/pkg/sql/opt/xform/custom_funcs.go @@ -2268,6 +2268,100 @@ func (c *CustomFuncs) MakeOrderingChoiceFromColumn( return oc } +// DuplicateScan constructs a new ScanPrivate that is identical to the input, +// but has new table and column IDs. +// +// DuplicateScan assumes it is being called on a canonical scan. Non-canonical +// scan properties like constraints are not copied to the new ScanPrivate. +func (c *CustomFuncs) DuplicateScan(sp *memo.ScanPrivate) *memo.ScanPrivate { + md := c.e.mem.Metadata() + tabMD := md.TableMeta(sp.Table) + dupTabID := md.AddTable(tabMD.Table, &tabMD.Alias) + + var dupTabColIDs opt.ColSet + cols := sp.Cols + for i, ok := cols.Next(0); ok; i, ok = cols.Next(i + 1) { + ord := tabMD.MetaID.ColumnOrdinal(i) + dupColID := dupTabID.ColumnID(ord) + dupTabColIDs.Add(dupColID) + } + + return &memo.ScanPrivate{ + Table: dupTabID, + Cols: dupTabColIDs, + } +} + +// MapFilterCols returns a new FiltersExpr with all the src column IDs in the input +// expression replaced with column IDs in dst. +func (c *CustomFuncs) MapFilterCols( + filters memo.FiltersExpr, src, dst *memo.ScanPrivate, +) memo.FiltersExpr { + srcCols := opt.ColSetToList(src.Cols) + dstCols := opt.ColSetToList(dst.Cols) + + // Map each column in src to a column in dst. + var colMap util.FastIntMap + for i, srcCol := range srcCols { + colMap.Set(int(srcCol), int(dstCols[i])) + } + + // Map the columns of each filter in the FilterExpr. + newFilters := make([]memo.FiltersItem, 0, len(filters)) + for _, f := range filters { + expr := c.mapFiltersItemCols(f, colMap) + newFilters = append(newFilters, c.e.f.ConstructFiltersItem(expr)) + } + + return newFilters +} + +// mapFiltersItemCols replaces the column ID of each VariableExpr in the input +// expression with new column IDs. The function recursively traverses the +// children of the expression tree looking for columns that need to be replaced. +// Each occurrence of the keys in colMap will be replaced with their +// corresponding values in colMap. +func (c *CustomFuncs) mapFiltersItemCols( + filter memo.FiltersItem, colMap util.FastIntMap, +) opt.ScalarExpr { + var replace norm.ReplaceFunc + replace = func(nd opt.Expr) opt.Expr { + switch t := nd.(type) { + case *memo.VariableExpr: + dstCol, ok := colMap.Get(int(t.Col)) + if !ok { + // TODO: Is this case even possible? + return nd + } + return c.e.f.ConstructVariable(opt.ColumnID(dstCol)) + } + return c.e.f.Replace(nd, replace) + } + + return replace(filter.Condition).(opt.ScalarExpr) +} + +// ExprOuterCols returns the outer columns of the given expression. +// +// Note that ExprOuterCols traverses the Expr tree rather than returning the +// ColSet from cached shared properties. This is because shared properties are +// not cached for all Expr types. +func (c *CustomFuncs) ExprOuterCols(expr opt.Expr) opt.ColSet { + var p props.Shared + memo.BuildSharedProps(expr, &p) + return p.OuterCols +} + +// ConstructSetPrivate constructs a new SetPrivate with column sets from the +// left, right, and output of the operation. +func (c *CustomFuncs) ConstructSetPrivate(left, right, out *memo.ScanPrivate) *memo.SetPrivate { + return &memo.SetPrivate{ + LeftCols: opt.ColSetToList(left.Cols), + RightCols: opt.ColSetToList(right.Cols), + OutCols: opt.ColSetToList(out.Cols), + } +} + // scanIndexIter is a helper struct that supports iteration over the indexes // of a Scan operator table. For example: // diff --git a/pkg/sql/opt/xform/rules/select.opt b/pkg/sql/opt/xform/rules/select.opt index 6daed913891c..a508387bbf9b 100644 --- a/pkg/sql/opt/xform/rules/select.opt +++ b/pkg/sql/opt/xform/rules/select.opt @@ -12,8 +12,8 @@ # and examples. [GenerateConstrainedScans, Explore] (Select - (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate)) - $filters:* + (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate)) + $filters:* ) => (GenerateConstrainedScans $scanPrivate $filters) @@ -22,8 +22,50 @@ # be serviced by an inverted index. [GenerateInvertedIndexScans, Explore] (Select - (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate) & (HasInvertedIndexes $scanPrivate)) - $filters:* + (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate) & (HasInvertedIndexes $scanPrivate)) + $filters:* ) => (GenerateInvertedIndexScans $scanPrivate $filters) + +# GenerateUnionSelects splits disjunctions (Or expressions) into a Union of two +# Select expressions, the first containing the left sub-expression of the Or +# expression and the second containing the right sub-expression. All other +# filter items in the original expression are preserved in the new Select +# expressions. +# +# This can produce better query plans in cases where indexes cover both sides of +# the Or expression. The execution plan can use both indexes to satisfy both +# sides of the disjunction and union the results together. +# +# Note that this rule only matches Selects with canonical scans. Therefore scan +# constraints do not need to be duplicated in the left and right scans of the +# union. +# +# Also note that this rule only matches Selects that have strict keys. Without +# a strict key, the Union expression would deduplicate rows with equal values +# but differing keys, leading to incorrect results. +[GenerateUnionSelects, Explore] +(Select + $input:(Scan + $scanPrivate:* & (IsCanonicalScan $scanPrivate) + ) & (HasStrictKey $input) + $filters:[ + ... + $item:(FiltersItem (Or $left:* $right:*)) & + ^(ColsAreEqual (ExprOuterCols $left) (ExprOuterCols $right)) + ... + ] +) +=> +(Union + (Select + $input + (ReplaceFiltersItem $filters $item $left) + ) + (Select + (Scan $rightScan:(DuplicateScan $scanPrivate)) + (MapFilterCols (ReplaceFiltersItem $filters $item $right) $scanPrivate $rightScan) + ) + (ConstructSetPrivate $scanPrivate $rightScan $scanPrivate) +) diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index df625d8a68c3..c704ceabe351 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -31,6 +31,18 @@ CREATE TABLE c ) ---- +exec-ddl +CREATE TABLE d +( + k INT PRIMARY KEY, + u INT, + v INT, + w INT, + INDEX u(u), + INDEX v(v) +) +---- + # -------------------------------------------------- # GenerateConstrainedScans # -------------------------------------------------- @@ -1008,3 +1020,274 @@ project │ └── fd: (1)-->(2) └── filters └── a:2 IS NULL [outer=(2), constraints=(/2: [/NULL - /NULL]; tight), fd=()-->(2)] + + +# -------------------------------------------------- +# GenerateUnionSelects +# -------------------------------------------------- + +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE u = 1 OR v = 1 +---- +project + ├── columns: k:1!null + ├── key: (1) + └── union + ├── columns: k:1!null u:2 v:3 + ├── left columns: k:1!null u:2 v:3 + ├── right columns: k:5 u:6 v:7 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── index-join d + │ ├── columns: k:1!null u:2!null v:3 + │ ├── key: (1) + │ ├── fd: ()-->(2), (1)-->(3) + │ └── scan d@u + │ ├── columns: k:1!null u:2!null + │ ├── constraint: /2/1: [/1 - /1] + │ ├── key: (1) + │ └── fd: ()-->(2) + └── index-join d + ├── columns: k:5!null u:6 v:7!null + ├── key: (5) + ├── fd: ()-->(7), (5)-->(6) + └── scan d@v + ├── columns: k:5!null v:7!null + ├── constraint: /7/5: [/1 - /1] + ├── key: (5) + └── fd: ()-->(7) + +opt expect=GenerateUnionSelects +SELECT * FROM d WHERE w = 1 AND (u = 1 OR v = 1) +---- +union + ├── columns: k:1!null u:2 v:3 w:4!null + ├── left columns: k:1!null u:2 v:3 w:4!null + ├── right columns: k:5 u:6 v:7 w:8 + ├── key: (1) + ├── fd: ()-->(4), (1)-->(2,3) + ├── select + │ ├── columns: k:1!null u:2!null v:3 w:4!null + │ ├── key: (1) + │ ├── fd: ()-->(2,4), (1)-->(3) + │ ├── index-join d + │ │ ├── columns: k:1!null u:2 v:3 w:4 + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(3,4) + │ │ └── scan d@u + │ │ ├── columns: k:1!null u:2!null + │ │ ├── constraint: /2/1: [/1 - /1] + │ │ ├── key: (1) + │ │ └── fd: ()-->(2) + │ └── filters + │ └── w:4 = 1 [outer=(4), constraints=(/4: [/1 - /1]; tight), fd=()-->(4)] + └── select + ├── columns: k:5!null u:6 v:7!null w:8!null + ├── key: (5) + ├── fd: ()-->(7,8), (5)-->(6) + ├── index-join d + │ ├── columns: k:5!null u:6 v:7 w:8 + │ ├── key: (5) + │ ├── fd: ()-->(7), (5)-->(6,8) + │ └── scan d@v + │ ├── columns: k:5!null v:7!null + │ ├── constraint: /7/5: [/1 - /1] + │ ├── key: (5) + │ └── fd: ()-->(7) + └── filters + └── w:8 = 1 [outer=(8), constraints=(/8: [/1 - /1]; tight), fd=()-->(8)] + +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE (u = 1 OR v = 2) AND (u = 10 OR v = 20) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── union + ├── columns: k:1!null u:2 v:3 + ├── left columns: k:1!null u:2 v:3 + ├── right columns: k:5 u:6 v:7 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── inner-join (zigzag d@u d@v) + │ ├── columns: k:1!null u:2!null v:3!null + │ ├── eq columns: [1] = [1] + │ ├── left fixed columns: [2] = [1] + │ ├── right fixed columns: [3] = [20] + │ ├── key: (1) + │ ├── fd: ()-->(2,3) + │ └── filters + │ ├── u:2 = 1 [outer=(2), constraints=(/2: [/1 - /1]; tight), fd=()-->(2)] + │ └── v:3 = 20 [outer=(3), constraints=(/3: [/20 - /20]; tight), fd=()-->(3)] + └── inner-join (zigzag d@u d@v) + ├── columns: k:5!null u:6!null v:7!null + ├── eq columns: [5] = [5] + ├── left fixed columns: [6] = [10] + ├── right fixed columns: [7] = [2] + ├── key: (5) + ├── fd: ()-->(6,7) + └── filters + ├── v:7 = 2 [outer=(7), constraints=(/7: [/2 - /2]; tight), fd=()-->(7)] + └── u:6 = 10 [outer=(6), constraints=(/6: [/10 - /10]; tight), fd=()-->(6)] + +# Don't expand INs to many ORs. +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE u IN (1, 2, 3, 4) OR v IN (5, 6, 7, 8) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── union + ├── columns: k:1!null u:2 v:3 + ├── left columns: k:1!null u:2 v:3 + ├── right columns: k:5 u:6 v:7 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── index-join d + │ ├── columns: k:1!null u:2!null v:3 + │ ├── key: (1) + │ ├── fd: (1)-->(2,3) + │ └── scan d@u + │ ├── columns: k:1!null u:2!null + │ ├── constraint: /2/1: [/1 - /4] + │ ├── key: (1) + │ └── fd: (1)-->(2) + └── index-join d + ├── columns: k:5!null u:6 v:7!null + ├── key: (5) + ├── fd: (5)-->(6,7) + └── scan d@v + ├── columns: k:5!null v:7!null + ├── constraint: /7/5: [/5 - /8] + ├── key: (5) + └── fd: (5)-->(7) + +# Uncorrelated subquery. +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE (u = 1 OR v = 1) AND EXISTS (SELECT * FROM a) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── union + ├── columns: d.k:1!null d.u:2 d.v:3 + ├── left columns: d.k:1!null d.u:2 d.v:3 + ├── right columns: d.k:8 d.u:9 d.v:10 + ├── key: (1) + ├── fd: (1)-->(2,3) + ├── index-join d + │ ├── columns: d.k:1!null d.u:2!null d.v:3 + │ ├── key: (1) + │ ├── fd: ()-->(2), (1)-->(3) + │ └── select + │ ├── columns: d.k:1!null d.u:2!null + │ ├── key: (1) + │ ├── fd: ()-->(2) + │ ├── scan d@u + │ │ ├── columns: d.k:1!null d.u:2!null + │ │ ├── constraint: /2/1: [/1 - /1] + │ │ ├── key: (1) + │ │ └── fd: ()-->(2) + │ └── filters + │ └── exists [subquery] + │ └── scan a + │ ├── columns: a.k:5!null a.u:6 a.v:7 + │ ├── limit: 1 + │ ├── key: () + │ └── fd: ()-->(5-7) + └── index-join d + ├── columns: d.k:8!null d.u:9 d.v:10!null + ├── key: (8) + ├── fd: ()-->(10), (8)-->(9) + └── select + ├── columns: d.k:8!null d.v:10!null + ├── key: (8) + ├── fd: ()-->(10) + ├── scan d@v + │ ├── columns: d.k:8!null d.v:10!null + │ ├── constraint: /10/8: [/1 - /1] + │ ├── key: (8) + │ └── fd: ()-->(10) + └── filters + └── exists [subquery] + └── scan a + ├── columns: a.k:5!null a.u:6 a.v:7 + ├── limit: 1 + ├── key: () + └── fd: ()-->(5-7) + +# Correlated subquery. +opt expect=GenerateUnionSelects +SELECT k FROM d WHERE (u = 1 OR v = 1) AND EXISTS (SELECT * FROM a WHERE a.u = d.u) +---- +project + ├── columns: k:1!null + ├── key: (1) + └── project + ├── columns: d.k:1!null d.u:2 d.v:3 + ├── key: (1) + ├── fd: (1)-->(2,3) + └── inner-join (hash) + ├── columns: d.k:1!null d.u:2!null d.v:3 a.u:6!null + ├── key: (1) + ├── fd: (1)-->(2,3), (2)==(6), (6)==(2) + ├── union + │ ├── columns: d.k:1!null d.u:2 d.v:3 + │ ├── left columns: d.k:1!null d.u:2 d.v:3 + │ ├── right columns: d.k:8 d.u:9 d.v:10 + │ ├── key: (1) + │ ├── fd: (1)-->(2,3) + │ ├── index-join d + │ │ ├── columns: d.k:1!null d.u:2!null d.v:3 + │ │ ├── key: (1) + │ │ ├── fd: ()-->(2), (1)-->(3) + │ │ └── scan d@u + │ │ ├── columns: d.k:1!null d.u:2!null + │ │ ├── constraint: /2/1: [/1 - /1] + │ │ ├── key: (1) + │ │ └── fd: ()-->(2) + │ └── index-join d + │ ├── columns: d.k:8!null d.u:9 d.v:10!null + │ ├── key: (8) + │ ├── fd: ()-->(10), (8)-->(9) + │ └── scan d@v + │ ├── columns: d.k:8!null d.v:10!null + │ ├── constraint: /10/8: [/1 - /1] + │ ├── key: (8) + │ └── fd: ()-->(10) + ├── distinct-on + │ ├── columns: a.u:6 + │ ├── grouping columns: a.u:6 + │ ├── internal-ordering: +6 + │ ├── key: (6) + │ └── scan a@u + │ ├── columns: a.u:6 + │ └── ordering: +6 + └── filters + └── a.u:6 = d.u:2 [outer=(2,6), constraints=(/2: (/NULL - ]; /6: (/NULL - ]), fd=(2)==(6), (6)==(2)] + +# Don't apply to queries without strict keys. +opt expect-not=GenerateUnionSelects +SELECT u, v FROM d WHERE u = 1 OR v = 1 +---- +select + ├── columns: u:2 v:3 + ├── scan d + │ └── columns: u:2 v:3 + └── filters + └── (u:2 = 1) OR (v:3 = 1) [outer=(2,3)] + +# Don't apply to disjunctions with identical colsets on the left and right. +opt expect-not=GenerateUnionSelects +SELECT k FROM d WHERE u = 1 OR u = 5 +---- +project + ├── columns: k:1!null + ├── key: (1) + └── scan d@u + ├── columns: k:1!null u:2!null + ├── constraint: /2/1 + │ ├── [/1 - /1] + │ └── [/5 - /5] + ├── key: (1) + └── fd: (1)-->(2)