Skip to content

Commit

Permalink
opt: Hoist Exists operator and try to decorrelate
Browse files Browse the repository at this point in the history
Transform EXISTS clause in WHERE clauses into a SemiJoinApply or
AntiSemiJoinApply operator. Additional rules will attempt to decorrelate
the right operand of the Apply by pushing the Apply down through any Select
operator. Example:

  SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE a.x=b.x)
  =>
  SELECT * FROM a SEMI JOIN APPLY (SELECT * FROM b WHERE a.x=b.x)
  =>
  SELECT * FROM a SEMI JOIN b WHERE a.x=b.x

Release note: None
  • Loading branch information
andy-kimball committed Apr 22, 2018
1 parent 3d13e28 commit 36e5213
Show file tree
Hide file tree
Showing 11 changed files with 732 additions and 74 deletions.
69 changes: 63 additions & 6 deletions pkg/sql/opt/norm/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,23 @@ func (f *Factory) listOnlyHasNulls(list memo.ListID) bool {
return true
}

// removeListItem returns a new list that is a copy of the given list, except
// that it does not contain the given search item. If the list contains the item
// multiple times, then only the first instance is removed.
func (f *Factory) removeListItem(list memo.ListID, search memo.GroupID) memo.ListID {
existingList := f.mem.LookupList(list)
newList := make([]memo.GroupID, len(existingList)-1)
for i, item := range existingList {
if item == search {
newList = append(newList[:i], existingList[i+1:]...)
break
}

newList[i] = item
}
return f.mem.InternList(newList)
}

// isSortedUniqueList returns true if the list is in sorted order, with no
// duplicates. See the comment for listSorter.compare for comparison rule
// details.
Expand Down Expand Up @@ -371,6 +388,13 @@ func (f *Factory) hasSubsetCols(left, right memo.GroupID) bool {
return f.outputCols(left).SubsetOf(f.outputCols(right))
}

// isScalarGroupBy returns true if the given grouping columns come from a
// "scalar" GroupBy operator. A scalar GroupBy always returns exactly one row,
// with any aggregate functions operating over the entire input expression.
func (f *Factory) isScalarGroupBy(groupingCols memo.PrivateID) bool {
return f.mem.LookupPrivate(groupingCols).(opt.ColSet).Empty()
}

// ----------------------------------------------------------------------
//
// Project Rules
Expand Down Expand Up @@ -578,12 +602,6 @@ func (f *Factory) offsetNoCycle(input, limit memo.GroupID, ordering memo.Private
//
// ----------------------------------------------------------------------

// emptyGroupingCols returns true if the given grouping columns for a GroupBy
// operator are empty.
func (f *Factory) emptyGroupingCols(cols memo.PrivateID) bool {
return f.mem.LookupPrivate(cols).(opt.ColSet).Empty()
}

// isCorrelated returns true if variables in the source expression reference
// columns in the destination expression. For example:
// (InnerJoin
Expand Down Expand Up @@ -692,6 +710,18 @@ func (f *Factory) concatFilters(left, right memo.GroupID) memo.GroupID {
return f.ConstructFilters(f.InternList(conditions))
}

// isCorrelatedSubquery returns true if the given relational expression contains
// unbound variable references. For example:
//
// SELECT * FROM a WHERE EXISTS(SELECT * FROM b WHERE b.x = a.x)
//
// The a.x variable in the EXISTS subquery references a column outside the scope
// of the subquery. It is an "outer column" for the subquery (see the comment on
// RelationalProps.OuterCols for more details).
func (f *Factory) isCorrelatedSubquery(subquery memo.GroupID) bool {
return !f.lookupLogical(subquery).Relational.OuterCols.Empty()
}

// ----------------------------------------------------------------------
//
// GroupBy Rules
Expand All @@ -713,6 +743,33 @@ func (f *Factory) colsAreKey(cols memo.PrivateID, group memo.GroupID) bool {
return false
}

// ----------------------------------------------------------------------
//
// Join Rules
// Custom match and replace functions used with join.opt rules.
//
// ----------------------------------------------------------------------

// removeApply replaces an apply join operator type with the corresponding non-
// apply join operator type. This is used when decorrelating subqueries.
func (f *Factory) removeApply(op opt.Operator, left, right, filter memo.GroupID) memo.GroupID {
switch op {
case opt.InnerJoinApplyOp:
return f.ConstructInnerJoin(left, right, filter)
case opt.LeftJoinApplyOp:
return f.ConstructLeftJoin(left, right, filter)
case opt.RightJoinApplyOp:
return f.ConstructRightJoin(left, right, filter)
case opt.FullJoinApplyOp:
return f.ConstructFullJoin(left, right, filter)
case opt.SemiJoinApplyOp:
return f.ConstructSemiJoin(left, right, filter)
case opt.AntiJoinApplyOp:
return f.ConstructAntiJoin(left, right, filter)
}
panic(fmt.Sprintf("unexpected join operator: %v", op))
}

// ----------------------------------------------------------------------
//
// Boolean Rules
Expand Down
10 changes: 8 additions & 2 deletions pkg/sql/opt/norm/rules/citations.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,11 @@ further information, and in some cases proofs, can be found.
ACM Trans. Database Syst.. 22. 43-73. 10.1145/244810.244812.
https://www.researchgate.net/publication/220225172_Outerjoin_Simplification_and_Reordering_for_Query_Optimization

[2] M. M. Joshi and C. A. Galindo-Legaria. Properties of the GroupBy/Aggregate
relational operator. Technical report, Microsoft, 2001. MSR-TR-2001-13.
[2] M. M. Joshi and C. A. Galindo-Legaria.
Properties of the GroupBy/Aggregate relational operator.
Technical report, Microsoft, 2001. MSR-TR-2001-13.

[3] Galindo-Legaria, C.A. & Joshi, Milind. (2001).
Orthogonal Optimization of Subqueries and Aggregation.
Sigmod Record. 30. 571-581. 10.1145/375663.375748.
http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.563.8492&rep=rep1&type=pdf
40 changes: 40 additions & 0 deletions pkg/sql/opt/norm/rules/join.opt
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,43 @@
)
(Filters (ExtractCorrelatedConditions $list (OutputCols $left)))
)

# DecorrelateJoin maps an apply join into the corresponding join without an
# apply if the right side of the join is not correlated with the left side.
# This allows the optimizer to consider additional physical join operators that
# are unable to handle correlated inputs.
#
# NOTE: Keep this before other decorrelation patterns, as if the correlated
# join can be removed first, it avoids unnecessarily matching other
# patterns that only exist to get to this pattern.
#
# Citations: [3]
[DecorrelateJoin, Normalize]
(JoinApply
$left:*
$right:* & ^(IsCorrelated $right $left)
$on:*
)
=>
(RemoveApply (OpName) $left $right $on)

# TryDecorrelateSelect "pushes down" the join apply into the select operator,
# in order to eliminate any correlation between the select filter list and the
# left side of the join, and also to keep "digging" down to find and eliminate
# other unnecessary correlation. Eventually, the hope is to trigger the
# DecorrelateJoin pattern to turn the JoinApply operator into a non-apply Join
# operator.
#
# Citations: [3]
[TryDecorrelateSelect, Normalize]
(InnerJoinApply | LeftJoinApply | SemiJoinApply | AntiJoinApply
$left:*
$right:(Select $input:* $filter:*) & (IsCorrelated $right $left)
$on:*
)
=>
((OpName)
$left
$input
(ConcatFilters $on $filter)
)
21 changes: 21 additions & 0 deletions pkg/sql/opt/norm/rules/scalar.opt
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,24 @@
)
=>
(Null (BoolType))

# EliminateExistsProject discards a Project input to the Exists operator. The
# Project operator never changes the row cardinality of its input, and row
# cardinality is the only thing that Exists cares about, so Project is a no-op.
[EliminateExistsProject, Normalize]
(Exists (Project $input:*)) => (Exists $input)

# EliminateExistsGroupBy discards a non-scalar GroupBy input to the Exists
# operator. While non-scalar GroupBy can change row cardinality, it always
# returns a non-empty set if its input is non-empty. Similarly, if its input is
# empty, then it returns the empty set. Therefore, it's a no-op for Exists.
[EliminateExistsGroupBy, Normalize]
(Exists
(GroupBy
$input:*
*
$groupingCols:* & ^(IsScalarGroupBy $groupingCols)
)
)
=>
(Exists $input)
41 changes: 37 additions & 4 deletions pkg/sql/opt/norm/rules/select.opt
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,8 @@

# PushSelectIntoGroupBy pushes a Select condition below a GroupBy in the case
# where it does not reference any of the aggregation columns. This only works
# if there are grouping columns. Otherwise, this is an instance of the "scalar"
# GroupBy, which returns only one row, and which exhibits different behavior if
# the input is empty:
# if this is an instance of the "scalar" GroupBy, which returns only one row,
# and which exhibits different behavior if the input is empty:
# SELECT MAX(y) FROM a
#
# If "a" is empty, this returns a single row containing a null value. This is
Expand All @@ -191,7 +190,7 @@
(GroupBy
$input:*
$aggregations:*
$groupingCols:* & ^(EmptyGroupingCols $groupingCols)
$groupingCols:* & ^(IsScalarGroupBy $groupingCols)
)
(Filters $list:[ ... $condition:* & ^(IsCorrelated $condition $aggregations) ... ])
)
Expand All @@ -207,3 +206,37 @@
)
(Filters (ExtractCorrelatedConditions $list (OutputCols $aggregations)))
)

# HoistSelectExists extracts existential subqueries from Select filters,
# turning them into semi-joins. This eliminates the subquery, which is often
# expensive to execute and restricts the optimizer's plan choices.
[HoistSelectExists, Normalize]
(Select
$input:*
$filter:(Filters
$list:[ ... $exists:(Exists $subquery:* & (IsCorrelatedSubquery $subquery)) ... ]
)
)
=>
(SemiJoinApply
$input
$subquery
(Filters (RemoveListItem $list $exists))
)

# HoistSelectNotExists extracts non-existential subqueries from Select filters,
# turning them into semi-joins. This eliminates the subquery, which is often
# expensive to execute and restricts the optimizer's plan choices.
[HoistSelectNotExists, Normalize]
(Select
$input:*
$filter:(Filters
$list:[ ... $exists:(Not (Exists $subquery:* & (IsCorrelatedSubquery $subquery))) ... ]
)
)
=>
(AntiJoinApply
$input
$subquery
(Filters (RemoveListItem $list $exists))
)
Loading

0 comments on commit 36e5213

Please sign in to comment.