Skip to content

Commit

Permalink
opt:detect and extract projected tuple equalities
Browse files Browse the repository at this point in the history
This commit adds a new rule to join normalisation that detects joins on tuple equalities between a tuple and a variable that is a projected tuple and splits them out into multiple equalities on the underlying columns, and hoists the project above the join. This allows us to simplify a number of queries where previously the projected tuple was preventing us merging projections.

Resolves: cockroachdb#43198

Release justification: low-risk performance improvement
  • Loading branch information
RoryBlevins committed Mar 17, 2020
1 parent b6d26e2 commit 0847229
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 24 deletions.
95 changes: 95 additions & 0 deletions pkg/sql/opt/norm/join.go
Original file line number Diff line number Diff line change
Expand Up @@ -798,3 +798,98 @@ func (c *CustomFuncs) ExtractJoinEquality(
// Project away the synthesized columns.
return c.f.ConstructProject(join, memo.EmptyProjectionsExpr, leftCols.Union(rightCols))
}

/// CanExtractTupleEquality Checks if the given filters contain one or more
/// equalities between projected variables and tuples, and that
/// no other filters reference any of the projected columns
func (c *CustomFuncs) CanExtractTupleEquality(
projections memo.ProjectionsExpr,
filters memo.FiltersExpr,
) bool {
projectedTupleCols := map[opt.ColumnID]memo.ProjectionsItem{}
var projectedColsSet opt.ColSet

for _, p := range projections {
projectedColsSet.Add(p.Col)
if _, isTuple := p.Element.(*memo.TupleExpr); isTuple {
projectedTupleCols[p.Col] = p
}
}
hasProjectTupleEquality := false
for _, f := range filters {
//we're only interested in filters which reference a projected column
if !f.ScalarProps().OuterCols.Intersection(projectedColsSet).Empty() {
hasProjectTupleEquality = true
if !isValidTupleEqualityExpression(&f, projectedTupleCols) {
return false
}
}
}
return hasProjectTupleEquality

}

func isValidTupleEqualityExpression(f *memo.FiltersItem,
projectedCols map[opt.ColumnID]memo.ProjectionsItem,
) bool {
if f.Condition.Op() != opt.EqOp {
return false
}
eq := f.Condition.(*memo.EqExpr)
if eq.Left.Op() == opt.VariableOp && eq.Right.Op() == opt.TupleOp {
variable := eq.Left.(*memo.VariableExpr)
if _, isTuple := projectedCols[variable.Col]; isTuple {
return true
}
}
return false
}

/// RewriteTupleEquality checks a list of filters for equalities between a projected
/// variable and a tuple, and replaces it with multiple equalities between the
/// underlying values
func (c *CustomFuncs) RewriteTupleEquality(filters memo.FiltersExpr,
projections memo.ProjectionsExpr,
) memo.FiltersExpr {
newFilters := make(memo.FiltersExpr, 0, len(filters))
projectedCols := map[opt.ColumnID]memo.ProjectionsItem{}
var projectedColsSet opt.ColSet

for _, p := range projections {
if _, isTuple := p.Element.(*memo.TupleExpr); isTuple {
projectedCols[p.Col] = p
projectedColsSet.Add(p.Col)
}
}

for _, f := range filters {

if !f.ScalarProps().OuterCols.Intersection(projectedColsSet).Empty() {
eq, ok := f.Condition.(*memo.EqExpr)
if ok {
if eq.Left.Op() == opt.VariableOp && eq.Right.Op() == opt.TupleOp {
leftVariable := eq.Left.(*memo.VariableExpr)
rightTuple := eq.Right.(*memo.TupleExpr)

if col, isTuple := projectedCols[leftVariable.Col]; isTuple {
projectedTuple := col.Element.(*memo.TupleExpr)
if len(projectedTuple.Elems) != len(rightTuple.Elems) {
panic(errors.AssertionFailedf("cannot compare tuples of unequal lengths"))
}

for i := range projectedTuple.Elems {
newFilters = append(newFilters, c.f.ConstructFiltersItem(
c.f.ConstructEq(projectedTuple.Elems[i], rightTuple.Elems[i]),
))
}
}
}
} else {
panic(errors.AssertionFailedf("cannot extract tuple equality from non-equality comparision"))
}
} else {
newFilters = append(newFilters, f)
}
}
return newFilters
}
65 changes: 63 additions & 2 deletions pkg/sql/opt/norm/rules/join.opt
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ $left
#
# TODO(andyk): Add other join types.
[HoistJoinProjectRight, Normalize]
(InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply
(InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | SemiJoin
$left:*
$right:(Project $input:* $projections:[])
$on:*
Expand All @@ -466,7 +466,7 @@ $left
# HoistJoinProjectLeft is the same as HoistJoinProjectRight, but for the left
# input of the join.
[HoistJoinProjectLeft, Normalize]
(InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply
(InnerJoin | InnerJoinApply | LeftJoin | LeftJoinApply | SemiJoin
$left:(Project $input:* $projections:[])
$right:*
$on:*
Expand Down Expand Up @@ -532,6 +532,65 @@ $left
$private
)

# ExtractProjectedTupleEqualityLeft detects joins which contain equalities between
# projected tuple variables and tuples, and splits these tuples out into multiple
# equalities on the underlying columns, and hoists the project above the join this
# can allow further simplification of project operators
[ExtractProjectedTupleEqualityLeft, Normalize]
(InnerJoin | LeftJoin | SemiJoin
$left: (Project $input:* $projections:* $passthrough:*)
$right:*
$on:[
...
(FiltersItem (Eq (Variable) (Tuple))) &
(CanExtractTupleEquality
$projections
$on
)
...
]
$private:*
)
=>
(Project
((OpName)
$input
$right
(RewriteTupleEquality $on $projections)
$private
)
$projections
(UnionCols $passthrough (OutputCols $right))
)

# ExtractProjectedTupleEqualityRight like ExtractProjectedTupleEqualityLeft, but for the right
[ExtractProjectedTupleEqualityRight, Normalize]
(InnerJoin | LeftJoin | SemiJoin
$left: *
$right:(Project $input:* $projections:* $passthrough:*)
$on:[
...
(FiltersItem (Eq (Variable) (Tuple))) &
(CanExtractTupleEquality
$projections
$on
)
...
]
$private:*
)
=>
(Project
((OpName)
$left
$input
(RewriteTupleEquality $on $projections)
$private
)
$projections
(UnionCols $passthrough (OutputCols $left))
)

# ExtractJoinEqualities finds equality conditions such that one side only
# depends on left columns and the other only on right columns and pushes the
# expressions down into Project operators. The result is a join that has an
Expand Down Expand Up @@ -582,3 +641,5 @@ $left
(SortFilters $on)
$private
)


61 changes: 61 additions & 0 deletions pkg/sql/opt/norm/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -2911,3 +2911,64 @@ full-join (cross)
│ └── fd: (3)-->(4)
└── filters
└── (substring('', ')') = '') = (u:3 > 0) [outer=(3)]

exec-ddl
CREATE TABLE ab (
a INT,
b INT,
PRIMARY KEY (a,b)
)
----


exec-ddl
CREATE TABLE cd (
c INT,
d INT
)
----

exec-ddl
ALTER TABLE cd INJECT STATISTICS '[
{
"columns": [ "c" ],
"created_at": "2019-12-13 16:17:43.033228+00:00",
"distinct_count": 4,
"row_count": 4
}
]'
----

opt
SELECT a,b FROM ab, cd WHERE a=c AND b=d
----
project
├── columns: a:1!null b:2!null
└── inner-join (lookup ab)
├── columns: a:1!null b:2!null c:3!null d:4!null
├── key columns: [3 4] = [1 2]
├── lookup columns are key
├── fd: (1)==(3), (3)==(1), (2)==(4), (4)==(2)
├── scan cd
│ └── columns: c:3 d:4
└── filters (true)

opt
SELECT * FROM ab WHERE (a, b) IN (SELECT c, d FROM cd)
----
project
├── columns: a:1!null b:2!null
├── key: (1,2)
└── inner-join (lookup ab)
├── columns: a:1!null b:2!null c:3!null d:4!null
├── key columns: [3 4] = [1 2]
├── lookup columns are key
├── key: (3,4)
├── fd: (1)==(3), (3)==(1), (2)==(4), (4)==(2)
├── distinct-on
│ ├── columns: c:3 d:4
│ ├── grouping columns: c:3 d:4
│ ├── key: (3,4)
│ └── scan cd
│ └── columns: c:3 d:4
└── filters (true)
35 changes: 13 additions & 22 deletions pkg/sql/opt/norm/testdata/rules/scalar
Original file line number Diff line number Diff line change
Expand Up @@ -1051,32 +1051,23 @@ project
├── columns: k:1!null
├── key: (1)
└── semi-join (hash)
├── columns: k:1!null column10:10
├── columns: k:1!null i:2
├── key: (1)
├── fd: (1)-->(10)
├── project
│ ├── columns: column10:10 k:1!null
├── fd: (1)-->(2)
├── scan a
│ ├── columns: k:1!null i:2
│ ├── key: (1)
│ ├── fd: (1)-->(10)
│ ├── scan a
│ │ ├── columns: k:1!null i:2
│ │ ├── key: (1)
│ │ └── fd: (1)-->(2)
│ └── projections
│ └── (k:1, i:2) [as=column10:10, outer=(1,2)]
├── project
│ ├── columns: column9:9!null
│ └── fd: (1)-->(2)
├── values
│ ├── columns: column1:7!null column2:8!null
│ ├── cardinality: [3 - 3]
│ ├── values
│ │ ├── columns: column1:7!null column2:8!null
│ │ ├── cardinality: [3 - 3]
│ │ ├── (1, 1)
│ │ ├── (2, 2)
│ │ └── (3, 3)
│ └── projections
│ └── (column2:8, column1:7) [as=column9:9, outer=(7,8)]
│ ├── (1, 1)
│ ├── (2, 2)
│ └── (3, 3)
└── filters
└── column10:10 = column9:9 [outer=(9,10), constraints=(/9: (/NULL - ]; /10: (/NULL - ]), fd=(9)==(10), (10)==(9)]
├── column2:8 = k:1 [outer=(1,8), constraints=(/1: (/NULL - ]; /8: (/NULL - ]), fd=(1)==(8), (8)==(1)]
└── column1:7 = i:2 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)]


# --------------------------------------------------
# SimplifyEqualsAnyTuple
Expand Down

0 comments on commit 0847229

Please sign in to comment.