Skip to content

Commit

Permalink
opt: add GenerateUnionSelects exploration rule for disjunction
Browse files Browse the repository at this point in the history
This commit adds a new exploration rule that can produce better query
plans for disjunctions (e.g. a = 1 OR b = 2). The rule transforms some
Select + Scan expressions with a disjunction filter into a Union of two
Select expressions, each with one side of the disjuction as a filter.
This can result in faster query plans in cases where two indexes cover
each side of the disjunction.

This rule only applies for Scan expressions that contain a strict key.

Fixes cockroachdb#2142

Release note (performance improvement): The query optimizer now produces
faster query plans for some disjunctions (OR expressions) by utilizing
multiple indexes.
  • Loading branch information
mgartner committed Apr 7, 2020
1 parent ab67b19 commit 807bf1e
Show file tree
Hide file tree
Showing 4 changed files with 514 additions and 26 deletions.
43 changes: 21 additions & 22 deletions pkg/sql/opt/norm/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,8 @@ func (c *CustomFuncs) CanMapOnSetOp(src *memo.FiltersItem) bool {
func (c *CustomFuncs) MapSetOpFilterLeft(
filter *memo.FiltersItem, set *memo.SetPrivate,
) opt.ScalarExpr {
return c.mapSetOpFilter(filter, set.OutCols, set.LeftCols)
colMap := makeMapFromColLists(set.OutCols, set.LeftCols)
return c.MapFiltersItemCols(filter, colMap)
}

// MapSetOpFilterRight maps the filter onto the right expression by replacing
Expand All @@ -1441,34 +1442,32 @@ func (c *CustomFuncs) MapSetOpFilterLeft(
func (c *CustomFuncs) MapSetOpFilterRight(
filter *memo.FiltersItem, set *memo.SetPrivate,
) opt.ScalarExpr {
return c.mapSetOpFilter(filter, set.OutCols, set.RightCols)
colMap := makeMapFromColLists(set.OutCols, set.RightCols)
return c.MapFiltersItemCols(filter, colMap)
}

// mapSetOpFilter maps filter expressions to dst by replacing occurrences of
// columns in src with corresponding columns in dst (the two lists must be of
// equal length).
//
// For each column in src that is not an outer column, SetMap replaces it with
// the corresponding column in dst.
//
// For example, consider this query:
//
// SELECT * FROM (SELECT x FROM a UNION SELECT y FROM b) WHERE x < 5
//
// If mapSetOpFilter is called on the left subtree of the Union, the filter
// x < 5 propagates to that side after mapping the column IDs appropriately.
// WLOG, If setMap is called on the right subtree, the filter x < 5 will be
// mapped similarly to y < 5 on the right side.
func (c *CustomFuncs) mapSetOpFilter(
filter *memo.FiltersItem, src opt.ColList, dst opt.ColList,
) opt.ScalarExpr {
// Map each column in src to one column in dst to map the
// filters appropriately.
// makeMapFromColLists maps each column ID in src to a column ID in dst. The
// columns IDs are mapped based on their relative positions in the column lists,
// e.g. the third item in src maps to the third item in dst. The lists must be
// of equal length.
func makeMapFromColLists(src opt.ColList, dst opt.ColList) util.FastIntMap {
if len(src) != len(dst) {
panic(errors.AssertionFailedf("src and dst must have the same length, src: %v, dst: %v", src, dst))
}

var colMap util.FastIntMap
for colIndex, outColID := range src {
colMap.Set(int(outColID), int(dst[colIndex]))
}
return colMap
}

// MapFiltersItemCols maps filter expressions by replacing occurrences of
// the keys of colMap with the corresponding values. Outer columns are not
// replaced.
func (c *CustomFuncs) MapFiltersItemCols(
filter *memo.FiltersItem, colMap util.FastIntMap,
) opt.ScalarExpr {
// Recursively walk the scalar sub-tree looking for references to columns
// that need to be replaced and then replace them appropriately.
var replace ReplaceFunc
Expand Down
91 changes: 91 additions & 0 deletions pkg/sql/opt/xform/custom_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -2268,6 +2268,97 @@ func (c *CustomFuncs) MakeOrderingChoiceFromColumn(
return oc
}

// DuplicateScanPrivate constructs a new ScanPrivate that is identical to the
// input, but has new table and column IDs.
//
// DuplicateScanPrivate can only be called on canonical ScanPrivates because not
// all scan properties are copied to the new ScanPrivate, e.g. constraints.
func (c *CustomFuncs) DuplicateScanPrivate(sp *memo.ScanPrivate) *memo.ScanPrivate {
if !c.IsCanonicalScan(sp) {
panic(errors.AssertionFailedf("input ScanPrivate must be canonical: %v", sp))
}

md := c.e.mem.Metadata()
tabMeta := md.TableMeta(sp.Table)
dupTabID := md.AddTable(tabMeta.Table, &tabMeta.Alias)

var dupTabColIDs opt.ColSet
cols := sp.Cols
for i, ok := cols.Next(0); ok; i, ok = cols.Next(i + 1) {
ord := tabMeta.MetaID.ColumnOrdinal(i)
dupColID := dupTabID.ColumnID(ord)
dupTabColIDs.Add(dupColID)
}

return &memo.ScanPrivate{
Table: dupTabID,
Cols: dupTabColIDs,
}
}

// MapScanFilterCols returns a new FiltersExpr with all the src column IDs in
// the input expression replaced with column IDs in dst.
//
// NOTE: Every ColumnID in src must map to the a ColumnID in dst with the same
// relative position in the ColSets. For example, if src and dst are (1, 5, 6)
// and (7, 12, 15), then the following mapping would be applied:
//
// 1 => 7
// 5 => 12
// 6 => 15
func (c *CustomFuncs) MapScanFilterCols(
filters memo.FiltersExpr, src *memo.ScanPrivate, dst *memo.ScanPrivate,
) memo.FiltersExpr {
if src.Cols.Len() != dst.Cols.Len() {
panic(errors.AssertionFailedf(
"src and dst must have the same number of columns, src.Cols: %v, dst.Cols: %v",
src.Cols,
dst.Cols,
))
}

// Map each column in src to a column in dst based on the relative position
// of both the src and dst ColumnIDs in the ColSet.
var colMap util.FastIntMap
dstCol, _ := dst.Cols.Next(0)
for srcCol, ok := src.Cols.Next(0); ok; srcCol, ok = src.Cols.Next(srcCol + 1) {
colMap.Set(int(srcCol), int(dstCol))
dstCol, _ = dst.Cols.Next(dstCol + 1)
}

// Map the columns of each filter in the FiltersExpr.
newFilters := make([]memo.FiltersItem, 0, len(filters))
for i := range filters {
expr := c.MapFiltersItemCols(&filters[i], colMap)
newFilters = append(newFilters, c.e.f.ConstructFiltersItem(expr))
}

return newFilters
}

// ExprOuterCols returns the outer columns of the given expression.
//
// Note that ExprOuterCols traverses the Expr tree rather than returning the
// ColSet from cached shared properties. This is because shared properties are
// not cached for all Expr types.
func (c *CustomFuncs) ExprOuterCols(expr opt.Expr) opt.ColSet {
var p props.Shared
memo.BuildSharedProps(expr, &p)
return p.OuterCols
}

// MakeSetPrivateForUnionSelects constructs a new SetPrivate with column sets
// from the left, right, and output of the operation.
func (c *CustomFuncs) MakeSetPrivateForUnionSelects(
left, right, out *memo.ScanPrivate,
) *memo.SetPrivate {
return &memo.SetPrivate{
LeftCols: opt.ColSetToList(left.Cols),
RightCols: opt.ColSetToList(right.Cols),
OutCols: opt.ColSetToList(out.Cols),
}
}

// scanIndexIter is a helper struct that supports iteration over the indexes
// of a Scan operator table. For example:
//
Expand Down
73 changes: 69 additions & 4 deletions pkg/sql/opt/xform/rules/select.opt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# and examples.
[GenerateConstrainedScans, Explore]
(Select
(Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate))
$filters:*
(Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate))
$filters:*
)
=>
(GenerateConstrainedScans $scanPrivate $filters)
Expand All @@ -22,8 +22,73 @@
# be serviced by an inverted index.
[GenerateInvertedIndexScans, Explore]
(Select
(Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate) & (HasInvertedIndexes $scanPrivate))
$filters:*
(Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate) & (HasInvertedIndexes $scanPrivate))
$filters:*
)
=>
(GenerateInvertedIndexScans $scanPrivate $filters)

# GenerateUnionSelects splits disjunctions (Or expressions) into a Union of two
# Select expressions, the first containing the left sub-expression of the Or
# expression and the second containing the right sub-expression. All other
# filter items in the original expression are preserved in the new Select
# expressions.
#
# This can produce better query plans in cases where indexes cover both sides of
# the Or expression. The execution plan can use both indexes to satisfy both
# sides of the disjunction and union the results together.
#
# Note that this rule only matches Selects with canonical scans. Therefore scan
# constraints do not need to be duplicated in the left and right scans of the
# union.
#
# Also note that this rule only matches Selects that have strict keys. This is
# required to prevent the generated Union from de-duplicating rows that have
# the same selected values. For example, consider the following:
#
# CREATE TABLE t (k INT PRIMARY KEY, a INT, b INT)
# INSERT INTO t VALUES (1, 1, 3)
# INSERT INTO t VALUES (2, 1, 3)
#
# The expected result of the following Select query is 2 rows, with values
# (1, 3).
#
# SELECT a, b FROM t WHERE a = 1 OR b = 3
#
# However, Union de-duplicates all tuples with the same set of values. So, the
# query below returns only a single row.
#
# SELECT a, b FROM t WHERE a = 1
# UNION
# SELECT a, b FROM t WHERE b = 3
#
# With a key in the output columns, each input row to the Union is guaranteed to
# be unique, and therefore will not be incorrectly de-duplicated.
[GenerateUnionSelects, Explore]
(Select
$input:(Scan
$scanPrivate:* & (IsCanonicalScan $scanPrivate)
) & (HasStrictKey $input)
$filters:[
...
$item:(FiltersItem (Or $left:* $right:*)) &
^(ColsAreEqual (ExprOuterCols $left) (ExprOuterCols $right))
...
]
)
=>
(Union
(Select
$input
(ReplaceFiltersItem $filters $item $left)
)
(Select
(Scan $rightScan:(DuplicateScanPrivate $scanPrivate))
(MapScanFilterCols
(ReplaceFiltersItem $filters $item $right)
$scanPrivate
$rightScan
)
)
(MakeSetPrivateForUnionSelects $scanPrivate $rightScan $scanPrivate)
)
Loading

0 comments on commit 807bf1e

Please sign in to comment.