Skip to content

Commit

Permalink
Merge #48298
Browse files Browse the repository at this point in the history
48298: opt: add rule to fold remappings of ValuesExpr columns r=andy-kimball a=DrewKimball

Previously, the optimizer had no rule to fold projected remappings of columns
from a ValuesExpr.

This patch adds a rule that folds any ProjectionsItems into the passthrough
set if:

1. The projection does nothing but remap a colum from the input.

2. The column being remapped is not itself in the passthrough set.

The Values output columns are replaced by the corresponding columns
projected by the folded ProjectionsItems so that logical equivalency is
preserved.

Example:
```
project
 ├── columns: x:2!null
 ├── values
 │    ├── columns: column1:1!null
 │    ├── cardinality: [2 - 2]
 │    ├── (1,)
 │    └── (2,)
 └── projections
      └── column1:1 [as=x:2, outer=(1)]
  =>
project
 ├── columns: x:2!null
 └── values
      ├── columns: x:2!null
      ├── cardinality: [2 - 2]
      ├── (1,)
      └── (2,)
```
In this example, the project can now be removed altogether.

Fixes: #48083

Release note: None

Co-authored-by: Drew Kimball <[email protected]>
  • Loading branch information
craig[bot] and DrewKimball committed May 6, 2020
2 parents 683f0d5 + d1e491d commit 80fdcc7
Show file tree
Hide file tree
Showing 7 changed files with 496 additions and 122 deletions.
30 changes: 11 additions & 19 deletions pkg/sql/opt/memo/testdata/stats/groupby
Original file line number Diff line number Diff line change
Expand Up @@ -483,28 +483,20 @@ project
│ │ ├── stats: [rows=1.29289322, distinct(4)=1.29289322, null(4)=1, distinct(5)=1.29289322, null(5)=0]
│ │ ├── key: (4)
│ │ ├── fd: (4)-->(5)
│ │ ├── project
│ │ ├── select
│ │ │ ├── columns: a:3(bool!null) b:4(int)
│ │ │ ├── cardinality: [0 - 3]
│ │ │ ├── stats: [rows=1.5, distinct(4)=1.29289322, null(4)=1]
│ │ │ ├── stats: [rows=1.5, distinct(3)=1, null(3)=0, distinct(4)=1.29289322, null(4)=1]
│ │ │ ├── fd: ()-->(3)
│ │ │ ├── select
│ │ │ │ ├── columns: column1:1(bool!null) column2:2(int)
│ │ │ │ ├── cardinality: [0 - 3]
│ │ │ │ ├── stats: [rows=1.5, distinct(1)=1, null(1)=0, distinct(2)=1.29289322, null(2)=1]
│ │ │ │ ├── fd: ()-->(1)
│ │ │ │ ├── values
│ │ │ │ │ ├── columns: column1:1(bool!null) column2:2(int)
│ │ │ │ │ ├── cardinality: [3 - 3]
│ │ │ │ │ ├── stats: [rows=3, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=2]
│ │ │ │ │ ├── (true, NULL) [type=tuple{bool, int}]
│ │ │ │ │ ├── (false, NULL) [type=tuple{bool, int}]
│ │ │ │ │ └── (true, 5) [type=tuple{bool, int}]
│ │ │ │ └── filters
│ │ │ │ └── column1:1 [type=bool, outer=(1), constraints=(/1: [/true - /true]; tight), fd=()-->(1)]
│ │ │ └── projections
│ │ │ ├── column1:1 [as=a:3, type=bool, outer=(1)]
│ │ │ └── column2:2 [as=b:4, type=int, outer=(2)]
│ │ │ ├── values
│ │ │ │ ├── columns: a:3(bool!null) b:4(int)
│ │ │ │ ├── cardinality: [3 - 3]
│ │ │ │ ├── stats: [rows=3, distinct(3)=2, null(3)=0, distinct(4)=2, null(4)=2]
│ │ │ │ ├── (true, NULL) [type=tuple{bool, int}]
│ │ │ │ ├── (false, NULL) [type=tuple{bool, int}]
│ │ │ │ └── (true, 5) [type=tuple{bool, int}]
│ │ │ └── filters
│ │ │ └── a:3 [type=bool, outer=(3), constraints=(/3: [/true - /true]; tight), fd=()-->(3)]
│ │ └── aggregations
│ │ └── bool-or [as=bool_or:5, type=bool, outer=(3)]
│ │ └── a:3 [type=bool]
Expand Down
142 changes: 141 additions & 1 deletion pkg/sql/opt/norm/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1313,13 +1313,153 @@ func (c *CustomFuncs) FoldTupleColumnAccess(
}

// Construct and return a new ProjectionsExpr using the new ColumnIDs.
for i, projection := range projections {
for i := range projections {
projection := &projections[i]
newProjections[i] = c.f.ConstructProjectionsItem(
replace(projection.Element).(opt.ScalarExpr), projection.Col)
}
return newProjections
}

// CanPushColumnRemappingIntoValues returns true if there is at least one
// ProjectionsItem for which the following conditions hold:
//
// 1. The ProjectionsItem remaps an output column from the given ValuesExpr.
//
// 2. The Values output column being remapped is not in the passthrough set.
//
func (c *CustomFuncs) CanPushColumnRemappingIntoValues(
projections memo.ProjectionsExpr, passthrough opt.ColSet, values memo.RelExpr,
) bool {
outputCols := values.(*memo.ValuesExpr).Relational().OutputCols
for i := range projections {
if variable, ok := projections[i].Element.(*memo.VariableExpr); ok {
if !passthrough.Contains(variable.Col) && outputCols.Contains(variable.Col) {
return true
}
}
}
return false
}

// PushColumnRemappingIntoValues folds ProjectionsItems into the passthrough set
// if all they do is remap output columns from the ValuesExpr input. The Values
// output columns are replaced by the corresponding columns from the folded
// ProjectionsItems.
//
// Example:
// project
// ├── columns: x:2!null
// ├── values
// │ ├── columns: column1:1!null
// │ ├── cardinality: [2 - 2]
// │ ├── (1,)
// │ └── (2,)
// └── projections
// └── column1:1 [as=x:2, outer=(1)]
// =>
// project
// ├── columns: x:2!null
// └── values
// ├── columns: x:2!null
// ├── cardinality: [2 - 2]
// ├── (1,)
// └── (2,)
//
// This allows other rules to fire. In the above example, EliminateProject can
// now remove the Project altogether.
func (c *CustomFuncs) PushColumnRemappingIntoValues(
oldInput memo.RelExpr, oldProjections memo.ProjectionsExpr, oldPassthrough opt.ColSet,
) memo.RelExpr {
oldValues := oldInput.(*memo.ValuesExpr)
oldValuesCols := oldValues.Relational().OutputCols
newPassthrough := oldPassthrough.Copy()
replacementCols := make(map[opt.ColumnID]opt.ColumnID)
var newProjections memo.ProjectionsExpr

// Construct the new ProjectionsExpr and passthrough columns. Keep track of
// which Values columns are to be replaced.
for i := range oldProjections {
oldItem := &oldProjections[i]

// A column can be replaced if the following conditions hold:
// 1. The current ProjectionsItem contains a VariableExpr.
// 2. The VariableExpr references a column from the ValuesExpr.
// 3. The column has not already been assigned a replacement.
// 4. The column is not a passthrough column.
if v, ok := oldItem.Element.(*memo.VariableExpr); ok {
if targetCol := v.Col; oldValuesCols.Contains(targetCol) {
if replacementCols[targetCol] == 0 {
if !newPassthrough.Contains(targetCol) {
// The conditions for column replacement have been met. Map the old
// Values output column to its replacement and add the replacement
// to newPassthrough so it will become a passthrough column.
// Continue so that no corresponding ProjectionsItem is added to
// newProjections.
replacementCols[targetCol] = oldItem.Col
newPassthrough.Add(oldItem.Col)
continue
}
}
}
}
// The current ProjectionsItem cannot be folded into newPassthrough because
// the above conditions do not hold. Simply add it to newProjections. Later,
// every ProjectionsItem will be recursively traversed and any references to
// columns that are in replacementCols will be replaced.
newProjections = append(newProjections, *oldItem)
}

// Recursively traverses a ProjectionsItem element and replaces references to
// old ValuesExpr columns with the replacement columns. This ensures that any
// remaining references to old columns are replaced. For example:
//
// WITH t AS (SELECT x, x FROM (VALUES (1)) f(x)) SELECT * FROM t;
//
// The "x" column of the Values operator will be mapped to the first column of
// t. This first column will become a passthrough column. Now, the remaining
// reference to "x" in the second column of t needs to be replaced by the new
// passthrough column.
var replace ReplaceFunc
replace = func(nd opt.Expr) opt.Expr {
switch t := nd.(type) {
case *memo.VariableExpr:
if replaceCol := replacementCols[t.Col]; replaceCol != 0 {
return c.f.ConstructVariable(replaceCol)
}
}
return c.f.Replace(nd, replace)
}

// Traverse each element in newProjections and replace col references as
// dictated by replacementCols.
for i := range newProjections {
item := &newProjections[i]
newProjections[i] = c.f.ConstructProjectionsItem(
replace(item.Element).(opt.ScalarExpr), item.Col)
}

// Replace all columns in newValuesColList that have been remapped by the old
// ProjectionsExpr.
oldValuesColList := oldValues.Cols
newValuesColList := make(opt.ColList, len(oldValuesColList))
for i := range newValuesColList {
if replaceCol := replacementCols[oldValuesColList[i]]; replaceCol != 0 {
newValuesColList[i] = replaceCol
} else {
newValuesColList[i] = oldValuesColList[i]
}
}

// Construct a new ValuesExpr with the replaced cols.
newValues := c.f.ConstructValues(
oldValues.Rows,
&memo.ValuesPrivate{Cols: newValuesColList, ID: c.f.Metadata().NextUniqueID()})

// Construct and return a new ProjectExpr with the new ValuesExpr as input.
return c.f.ConstructProject(newValues, newProjections, newPassthrough)
}

// ----------------------------------------------------------------------
//
// Select Rules
Expand Down
41 changes: 41 additions & 0 deletions pkg/sql/opt/norm/rules/project.opt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,47 @@ $input
=>
(MergeProjectWithValues $projections $passthrough $input)

# PushColumnRemappingIntoValues folds ProjectionsItems into the passthrough set
# if they simply remap Values output columns that are not already in
# passthrough. The Values output columns are replaced with the corresponding
# columns projected by the folded ProjectionsItems.
#
# Example:
#
# project
# ├── columns: x:2!null
# ├── values
# │ ├── columns: column1:1!null
# │ ├── cardinality: [2 - 2]
# │ ├── (1,)
# │ └── (2,)
# └── projections
# └── column1:1 [as=x:2, outer=(1)]
# =>
# project
# ├── columns: x:2!null
# └── values
# ├── columns: x:2!null
# ├── cardinality: [2 - 2]
# ├── (1,)
# └── (2,)
#
# This allows other rules to fire. In the example above, the project would now
# be removed by EliminateProject.
[PushColumnRemappingIntoValues, Normalize]
(Project
$input:(Values)
$projections:*
$passthrough:* &
(CanPushColumnRemappingIntoValues
$projections
$passthrough
$input
)
)
=>
(PushColumnRemappingIntoValues $input $projections $passthrough)

# FoldTupleAccessIntoValues replaces a Values with a single column that
# references a column of tuples with a new Values that has a column for each
# tuple index. This works as long as the surrounding Project does not reference
Expand Down
Loading

0 comments on commit 80fdcc7

Please sign in to comment.