Skip to content

Commit

Permalink
opt: Tighten up InlineProjectInProject rule
Browse files Browse the repository at this point in the history
Allow inlining nested Project in case where there are duplicate refs
to same inner passthrough column. Previously, this case prevented
inlining. However, only duplicate references to inner *synthesized*
columns need to be detected.

Release note: None
  • Loading branch information
andy-kimball committed Jan 1, 2019
1 parent a8afcab commit d712a0a
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 34 deletions.
27 changes: 10 additions & 17 deletions pkg/sql/opt/exec/execbuilder/testdata/select
Original file line number Diff line number Diff line change
Expand Up @@ -984,14 +984,11 @@ CREATE TABLE abcd (a INT, b INT, c INT, d INT, PRIMARY KEY (a, b))
query TTTTT
EXPLAIN (VERBOSE) SELECT a + x FROM (SELECT a, b + c AS x FROM abcd) ORDER BY a
----
render · · ("?column?") ·
│ render 0 a + x · ·
└── render · · (x, a) +a
│ render 0 b + c · ·
│ render 1 a · ·
└── scan · · (a, b, c) +a
· table abcd@primary · ·
· spans ALL · ·
render · · ("?column?") ·
│ render 0 a + (b + c) · ·
└── scan · · (a, b, c) +a
· table abcd@primary · ·
· spans ALL · ·

query TTTTT
EXPLAIN (VERBOSE) SELECT a + x FROM (SELECT a, b, a + b + c AS x FROM abcd) ORDER BY b
Expand All @@ -1011,15 +1008,11 @@ render · · ("?column?") ·
query TTTTT
EXPLAIN (VERBOSE) SELECT a + x FROM (SELECT a, b, a + b + c AS x FROM abcd) ORDER BY a DESC, b DESC
----
render · · ("?column?") ·
│ render 0 a + x · ·
└── render · · (x, a, b) -a,-b
│ render 0 c + (a + b) · ·
│ render 1 a · ·
│ render 2 b · ·
└── revscan · · (a, b, c) -a,-b
· table abcd@primary · ·
· spans ALL · ·
render · · ("?column?") ·
│ render 0 a + (c + (a + b)) · ·
└── revscan · · (a, b, c) -a,-b
· table abcd@primary · ·
· spans ALL · ·

# Ensure that filter nodes (and filtered scan nodes) get populated with the correct ordering.
query TTTTT
Expand Down
26 changes: 17 additions & 9 deletions pkg/sql/opt/norm/inline.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,42 @@ import (
)

// HasDuplicateRefs returns true if the target projection expressions or
// passthrough columns reference any outer column more than one time, or if the
// projection expressions contain a correlated subquery. For example:
// passthrough columns reference any column in the given target set more than
// one time, or if the projection expressions contain a correlated subquery.
// For example:
//
// SELECT x+1, x+2, y FROM a
//
// HasDuplicateRefs would be true, since the x column is referenced twice.
//
// Correlated subqueries are disallowed since it introduces additional
// complexity for a case that's not too important for inlining.
// complexity for a case that's not too important for inlining. Also, skipping
// correlated subqueries minimizes expensive searching in deep trees.
func (c *CustomFuncs) HasDuplicateRefs(
projections memo.ProjectionsExpr, passthrough opt.ColSet,
projections memo.ProjectionsExpr, passthrough opt.ColSet, targetCols opt.ColSet,
) bool {
// Start with copy of passthrough columns, as they each count as a ref.
refs := passthrough.Copy()
// Passthrough columns that reference a target column count as refs.
refs := passthrough.Intersection(targetCols)
for i := range projections {
item := &projections[i]
if item.ScalarProps(c.mem).HasCorrelatedSubquery {
// Don't traverse the expression tree if there is a correlated subquery.
return true
}

// When a column reference is found, add it to the refs set. If the set
// already contains a reference to that column, then there is a duplicate.
// findDupRefs returns true if the subtree contains at least one duplicate.
// When a target column reference is found, add it to the refs set. If
// the set already contains a reference to that column, then there is a
// duplicate. findDupRefs returns true if the subtree contains at least
// one duplicate.
var findDupRefs func(e opt.Expr) bool
findDupRefs = func(e opt.Expr) bool {
switch t := e.(type) {
case *memo.VariableExpr:
// Ignore references to non-target columns.
if !targetCols.Contains(int(t.Col)) {
return false
}

// Count Variable references.
if refs.Contains(int(t.Col)) {
return true
Expand Down
17 changes: 9 additions & 8 deletions pkg/sql/opt/norm/rules/inline.opt
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@
)

# InlineProjectInProject folds an inner Project operator into an outer Project
# that references each inner column no more than one time. If there are no
# duplicate references, then there's no benefit to keeping the multiple nested
# projections. This rule simplifies the relational expression tree and makes it
# more likely that other normalization rules will match.
# that references each inner synthesized column no more than one time. If there
# are no duplicate references, then there's no benefit to keeping the multiple
# nested projections. This rule simplifies the relational expression tree and
# makes it more likely that other normalization rules will match.
#
# This rule is low priority so that it runs after the EliminateProjectProject
# rule, since that rule is cheaper to match and replace.
# This rule is low priority so that it runs after the MergeProjects rule, since
# that rule is cheaper to match and replace.
#
# Example:
# SELECT x2*2 FROM (SELECT x+1 AS x2 FROM xy)
Expand All @@ -65,9 +65,10 @@
#
[InlineProjectInProject, Normalize, LowPriority]
(Project
$input:(Project)
$input:(Project * $innerProjections:*)
$projections:*
$passthrough:* & ^(HasDuplicateRefs $projections $passthrough)
$passthrough:* &
^(HasDuplicateRefs $projections $passthrough (ProjectionCols $innerProjections))
)
=>
(InlineProjectProject $input $projections $passthrough)
34 changes: 34 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/inline
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,40 @@ project
├── k != 1 [type=bool, outer=(1)]
└── i + 1 [type=int, outer=(2)]

# Multiple synthesized column references to same inner passthrough column
# (should still inline).
opt expect=InlineProjectInProject
SELECT x+1, x+2, y1+2 FROM (SELECT x, y+1 AS y1 FROM xy)
----
project
├── columns: "?column?":4(int) "?column?":5(int) "?column?":6(int)
├── scan xy
│ ├── columns: x:1(int!null) y:2(int)
│ ├── key: (1)
│ └── fd: (1)-->(2)
└── projections
├── x + 1 [type=int, outer=(1)]
├── x + 2 [type=int, outer=(1)]
└── (y + 1) + 2 [type=int, outer=(2)]

# Synthesized and passthrough references to same inner passthrough column
# (should still inline).
opt expect=InlineProjectInProject
SELECT x+y1 FROM (SELECT x, y+1 AS y1 FROM xy) ORDER BY x
----
project
├── columns: "?column?":4(int) [hidden: x:1(int!null)]
├── key: (1)
├── fd: (1)-->(4)
├── ordering: +1
├── scan xy
│ ├── columns: x:1(int!null) y:2(int)
│ ├── key: (1)
│ ├── fd: (1)-->(2)
│ └── ordering: +1
└── projections
└── x + (y + 1) [type=int, outer=(1,2)]

# Inline multiple expressions.
opt expect=InlineProjectInProject
SELECT expr+1 AS r, i, expr2 || 'bar' AS s FROM (SELECT k+1 AS expr, s || 'foo' AS expr2, i FROM a)
Expand Down

0 comments on commit d712a0a

Please sign in to comment.