From d712a0a9657720115130165957e30aad39a0e91c Mon Sep 17 00:00:00 2001 From: Andrew Kimball Date: Tue, 1 Jan 2019 11:41:14 -0700 Subject: [PATCH] opt: Tighten up InlineProjectInProject rule Allow inlining nested Project in case where there are duplicate refs to same inner passthrough column. Previously, this case prevented inlining. However, only duplicate references to inner *synthesized* columns need to be detected. Release note: None --- pkg/sql/opt/exec/execbuilder/testdata/select | 27 ++++++---------- pkg/sql/opt/norm/inline.go | 26 +++++++++------ pkg/sql/opt/norm/rules/inline.opt | 17 +++++----- pkg/sql/opt/norm/testdata/rules/inline | 34 ++++++++++++++++++++ 4 files changed, 70 insertions(+), 34 deletions(-) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/select b/pkg/sql/opt/exec/execbuilder/testdata/select index 5b0e7ddeb62a..b51bf3bdf376 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/select +++ b/pkg/sql/opt/exec/execbuilder/testdata/select @@ -984,14 +984,11 @@ CREATE TABLE abcd (a INT, b INT, c INT, d INT, PRIMARY KEY (a, b)) query TTTTT EXPLAIN (VERBOSE) SELECT a + x FROM (SELECT a, b + c AS x FROM abcd) ORDER BY a ---- -render · · ("?column?") · - │ render 0 a + x · · - └── render · · (x, a) +a - │ render 0 b + c · · - │ render 1 a · · - └── scan · · (a, b, c) +a -· table abcd@primary · · -· spans ALL · · +render · · ("?column?") · + │ render 0 a + (b + c) · · + └── scan · · (a, b, c) +a +· table abcd@primary · · +· spans ALL · · query TTTTT EXPLAIN (VERBOSE) SELECT a + x FROM (SELECT a, b, a + b + c AS x FROM abcd) ORDER BY b @@ -1011,15 +1008,11 @@ render · · ("?column?") · query TTTTT EXPLAIN (VERBOSE) SELECT a + x FROM (SELECT a, b, a + b + c AS x FROM abcd) ORDER BY a DESC, b DESC ---- -render · · ("?column?") · - │ render 0 a + x · · - └── render · · (x, a, b) -a,-b - │ render 0 c + (a + b) · · - │ render 1 a · · - │ render 2 b · · - └── revscan · · (a, b, c) -a,-b -· table abcd@primary · · -· spans ALL · · +render · · ("?column?") · + │ render 0 a + (c + (a + b)) · · + └── revscan · · (a, b, c) -a,-b +· table abcd@primary · · +· spans ALL · · # Ensure that filter nodes (and filtered scan nodes) get populated with the correct ordering. query TTTTT diff --git a/pkg/sql/opt/norm/inline.go b/pkg/sql/opt/norm/inline.go index 56feec123d96..7ac218d06dba 100644 --- a/pkg/sql/opt/norm/inline.go +++ b/pkg/sql/opt/norm/inline.go @@ -20,20 +20,22 @@ import ( ) // HasDuplicateRefs returns true if the target projection expressions or -// passthrough columns reference any outer column more than one time, or if the -// projection expressions contain a correlated subquery. For example: +// passthrough columns reference any column in the given target set more than +// one time, or if the projection expressions contain a correlated subquery. +// For example: // // SELECT x+1, x+2, y FROM a // // HasDuplicateRefs would be true, since the x column is referenced twice. // // Correlated subqueries are disallowed since it introduces additional -// complexity for a case that's not too important for inlining. +// complexity for a case that's not too important for inlining. Also, skipping +// correlated subqueries minimizes expensive searching in deep trees. func (c *CustomFuncs) HasDuplicateRefs( - projections memo.ProjectionsExpr, passthrough opt.ColSet, + projections memo.ProjectionsExpr, passthrough opt.ColSet, targetCols opt.ColSet, ) bool { - // Start with copy of passthrough columns, as they each count as a ref. - refs := passthrough.Copy() + // Passthrough columns that reference a target column count as refs. + refs := passthrough.Intersection(targetCols) for i := range projections { item := &projections[i] if item.ScalarProps(c.mem).HasCorrelatedSubquery { @@ -41,13 +43,19 @@ func (c *CustomFuncs) HasDuplicateRefs( return true } - // When a column reference is found, add it to the refs set. If the set - // already contains a reference to that column, then there is a duplicate. - // findDupRefs returns true if the subtree contains at least one duplicate. + // When a target column reference is found, add it to the refs set. If + // the set already contains a reference to that column, then there is a + // duplicate. findDupRefs returns true if the subtree contains at least + // one duplicate. var findDupRefs func(e opt.Expr) bool findDupRefs = func(e opt.Expr) bool { switch t := e.(type) { case *memo.VariableExpr: + // Ignore references to non-target columns. + if !targetCols.Contains(int(t.Col)) { + return false + } + // Count Variable references. if refs.Contains(int(t.Col)) { return true diff --git a/pkg/sql/opt/norm/rules/inline.opt b/pkg/sql/opt/norm/rules/inline.opt index b316a134814a..73138df83051 100644 --- a/pkg/sql/opt/norm/rules/inline.opt +++ b/pkg/sql/opt/norm/rules/inline.opt @@ -50,13 +50,13 @@ ) # InlineProjectInProject folds an inner Project operator into an outer Project -# that references each inner column no more than one time. If there are no -# duplicate references, then there's no benefit to keeping the multiple nested -# projections. This rule simplifies the relational expression tree and makes it -# more likely that other normalization rules will match. +# that references each inner synthesized column no more than one time. If there +# are no duplicate references, then there's no benefit to keeping the multiple +# nested projections. This rule simplifies the relational expression tree and +# makes it more likely that other normalization rules will match. # -# This rule is low priority so that it runs after the EliminateProjectProject -# rule, since that rule is cheaper to match and replace. +# This rule is low priority so that it runs after the MergeProjects rule, since +# that rule is cheaper to match and replace. # # Example: # SELECT x2*2 FROM (SELECT x+1 AS x2 FROM xy) @@ -65,9 +65,10 @@ # [InlineProjectInProject, Normalize, LowPriority] (Project - $input:(Project) + $input:(Project * $innerProjections:*) $projections:* - $passthrough:* & ^(HasDuplicateRefs $projections $passthrough) + $passthrough:* & + ^(HasDuplicateRefs $projections $passthrough (ProjectionCols $innerProjections)) ) => (InlineProjectProject $input $projections $passthrough) diff --git a/pkg/sql/opt/norm/testdata/rules/inline b/pkg/sql/opt/norm/testdata/rules/inline index e09d3f55cdf0..997036977ef1 100644 --- a/pkg/sql/opt/norm/testdata/rules/inline +++ b/pkg/sql/opt/norm/testdata/rules/inline @@ -239,6 +239,40 @@ project ├── k != 1 [type=bool, outer=(1)] └── i + 1 [type=int, outer=(2)] +# Multiple synthesized column references to same inner passthrough column +# (should still inline). +opt expect=InlineProjectInProject +SELECT x+1, x+2, y1+2 FROM (SELECT x, y+1 AS y1 FROM xy) +---- +project + ├── columns: "?column?":4(int) "?column?":5(int) "?column?":6(int) + ├── scan xy + │ ├── columns: x:1(int!null) y:2(int) + │ ├── key: (1) + │ └── fd: (1)-->(2) + └── projections + ├── x + 1 [type=int, outer=(1)] + ├── x + 2 [type=int, outer=(1)] + └── (y + 1) + 2 [type=int, outer=(2)] + +# Synthesized and passthrough references to same inner passthrough column +# (should still inline). +opt expect=InlineProjectInProject +SELECT x+y1 FROM (SELECT x, y+1 AS y1 FROM xy) ORDER BY x +---- +project + ├── columns: "?column?":4(int) [hidden: x:1(int!null)] + ├── key: (1) + ├── fd: (1)-->(4) + ├── ordering: +1 + ├── scan xy + │ ├── columns: x:1(int!null) y:2(int) + │ ├── key: (1) + │ ├── fd: (1)-->(2) + │ └── ordering: +1 + └── projections + └── x + (y + 1) [type=int, outer=(1,2)] + # Inline multiple expressions. opt expect=InlineProjectInProject SELECT expr+1 AS r, i, expr2 || 'bar' AS s FROM (SELECT k+1 AS expr, s || 'foo' AS expr2, i FROM a)