opt: add GenerateUnionSelects exploration rule for disjunction

This commit adds a new exploration rule that can produce better query plans for disjunctions (e.g. a = 1 OR b = 2). The rule transforms some Select + Scan expressions with a disjunction filter into a Union of two Select expressions, each with one side of the disjuction as a filter. This can result in faster query plans in cases where two indexes cover each side of the disjunction. This rule only applies for Scan expressions that contain a strict key. Fixes cockroachdb#2142 Release note (performance improvement): The query optimizer now produces faster query plans for some disjunctions (OR expressions) by utilizing multiple indexes.
mgartner · Apr 7, 2020 · 807bf1e · 807bf1e
1 parent ab67b19
commit 807bf1e
Show file tree

Hide file tree

Showing 4 changed files with 514 additions and 26 deletions.
diff --git a/pkg/sql/opt/norm/custom_funcs.go b/pkg/sql/opt/norm/custom_funcs.go
@@ -1431,7 +1431,8 @@ func (c *CustomFuncs) CanMapOnSetOp(src *memo.FiltersItem) bool {
 func (c *CustomFuncs) MapSetOpFilterLeft(
 	filter *memo.FiltersItem, set *memo.SetPrivate,
 ) opt.ScalarExpr {
-	return c.mapSetOpFilter(filter, set.OutCols, set.LeftCols)
+	colMap := makeMapFromColLists(set.OutCols, set.LeftCols)
+	return c.MapFiltersItemCols(filter, colMap)
 }
 
 // MapSetOpFilterRight maps the filter onto the right expression by replacing
@@ -1441,34 +1442,32 @@ func (c *CustomFuncs) MapSetOpFilterLeft(
 func (c *CustomFuncs) MapSetOpFilterRight(
 	filter *memo.FiltersItem, set *memo.SetPrivate,
 ) opt.ScalarExpr {
-	return c.mapSetOpFilter(filter, set.OutCols, set.RightCols)
+	colMap := makeMapFromColLists(set.OutCols, set.RightCols)
+	return c.MapFiltersItemCols(filter, colMap)
 }
 
-// mapSetOpFilter maps filter expressions to dst by replacing occurrences of
-// columns in src with corresponding columns in dst (the two lists must be of
-// equal length).
-//
-// For each column in src that is not an outer column, SetMap replaces it with
-// the corresponding column in dst.
-//
-// For example, consider this query:
-//
-//   SELECT * FROM (SELECT x FROM a UNION SELECT y FROM b) WHERE x < 5
-//
-// If mapSetOpFilter is called on the left subtree of the Union, the filter
-// x < 5 propagates to that side after mapping the column IDs appropriately.
-// WLOG, If setMap is called on the right subtree, the filter x < 5 will be
-// mapped similarly to y < 5 on the right side.
-func (c *CustomFuncs) mapSetOpFilter(
-	filter *memo.FiltersItem, src opt.ColList, dst opt.ColList,
-) opt.ScalarExpr {
-	// Map each column in src to one column in dst to map the
-	// filters appropriately.
+// makeMapFromColLists maps each column ID in src to a column ID in dst. The
+// columns IDs are mapped based on their relative positions in the column lists,
+// e.g. the third item in src maps to the third item in dst. The lists must be
+// of equal length.
+func makeMapFromColLists(src opt.ColList, dst opt.ColList) util.FastIntMap {
+	if len(src) != len(dst) {
+		panic(errors.AssertionFailedf("src and dst must have the same length, src: %v, dst: %v", src, dst))
+	}
+
 	var colMap util.FastIntMap
 	for colIndex, outColID := range src {
 		colMap.Set(int(outColID), int(dst[colIndex]))
 	}
+	return colMap
+}
 
+// MapFiltersItemCols maps filter expressions by replacing occurrences of
+// the keys of colMap with the corresponding values. Outer columns are not
+// replaced.
+func (c *CustomFuncs) MapFiltersItemCols(
+	filter *memo.FiltersItem, colMap util.FastIntMap,
+) opt.ScalarExpr {
 	// Recursively walk the scalar sub-tree looking for references to columns
 	// that need to be replaced and then replace them appropriately.
 	var replace ReplaceFunc

diff --git a/pkg/sql/opt/xform/custom_funcs.go b/pkg/sql/opt/xform/custom_funcs.go
@@ -2268,6 +2268,97 @@ func (c *CustomFuncs) MakeOrderingChoiceFromColumn(
 	return oc
 }
 
+// DuplicateScanPrivate constructs a new ScanPrivate that is identical to the
+// input, but has new table and column IDs.
+//
+// DuplicateScanPrivate can only be called on canonical ScanPrivates because not
+// all scan properties are copied to the new ScanPrivate, e.g. constraints.
+func (c *CustomFuncs) DuplicateScanPrivate(sp *memo.ScanPrivate) *memo.ScanPrivate {
+	if !c.IsCanonicalScan(sp) {
+		panic(errors.AssertionFailedf("input ScanPrivate must be canonical: %v", sp))
+	}
+
+	md := c.e.mem.Metadata()
+	tabMeta := md.TableMeta(sp.Table)
+	dupTabID := md.AddTable(tabMeta.Table, &tabMeta.Alias)
+
+	var dupTabColIDs opt.ColSet
+	cols := sp.Cols
+	for i, ok := cols.Next(0); ok; i, ok = cols.Next(i + 1) {
+		ord := tabMeta.MetaID.ColumnOrdinal(i)
+		dupColID := dupTabID.ColumnID(ord)
+		dupTabColIDs.Add(dupColID)
+	}
+
+	return &memo.ScanPrivate{
+		Table: dupTabID,
+		Cols:  dupTabColIDs,
+	}
+}
+
+// MapScanFilterCols returns a new FiltersExpr with all the src column IDs in
+// the input expression replaced with column IDs in dst.
+//
+// NOTE: Every ColumnID in src must map to the a ColumnID in dst with the same
+// relative position in the ColSets. For example, if src and dst are (1, 5, 6)
+// and (7, 12, 15), then the following mapping would be applied:
+//
+//   1 => 7
+//   5 => 12
+//   6 => 15
+func (c *CustomFuncs) MapScanFilterCols(
+	filters memo.FiltersExpr, src *memo.ScanPrivate, dst *memo.ScanPrivate,
+) memo.FiltersExpr {
+	if src.Cols.Len() != dst.Cols.Len() {
+		panic(errors.AssertionFailedf(
+			"src and dst must have the same number of columns, src.Cols: %v, dst.Cols: %v",
+			src.Cols,
+			dst.Cols,
+		))
+	}
+
+	// Map each column in src to a column in dst based on the relative position
+	// of both the src and dst ColumnIDs in the ColSet.
+	var colMap util.FastIntMap
+	dstCol, _ := dst.Cols.Next(0)
+	for srcCol, ok := src.Cols.Next(0); ok; srcCol, ok = src.Cols.Next(srcCol + 1) {
+		colMap.Set(int(srcCol), int(dstCol))
+		dstCol, _ = dst.Cols.Next(dstCol + 1)
+	}
+
+	// Map the columns of each filter in the FiltersExpr.
+	newFilters := make([]memo.FiltersItem, 0, len(filters))
+	for i := range filters {
+		expr := c.MapFiltersItemCols(&filters[i], colMap)
+		newFilters = append(newFilters, c.e.f.ConstructFiltersItem(expr))
+	}
+
+	return newFilters
+}
+
+// ExprOuterCols returns the outer columns of the given expression.
+//
+// Note that ExprOuterCols traverses the Expr tree rather than returning the
+// ColSet from cached shared properties. This is because shared properties are
+// not cached for all Expr types.
+func (c *CustomFuncs) ExprOuterCols(expr opt.Expr) opt.ColSet {
+	var p props.Shared
+	memo.BuildSharedProps(expr, &p)
+	return p.OuterCols
+}
+
+// MakeSetPrivateForUnionSelects constructs a new SetPrivate with column sets
+// from the left, right, and output of the operation.
+func (c *CustomFuncs) MakeSetPrivateForUnionSelects(
+	left, right, out *memo.ScanPrivate,
+) *memo.SetPrivate {
+	return &memo.SetPrivate{
+		LeftCols:  opt.ColSetToList(left.Cols),
+		RightCols: opt.ColSetToList(right.Cols),
+		OutCols:   opt.ColSetToList(out.Cols),
+	}
+}
+
 // scanIndexIter is a helper struct that supports iteration over the indexes
 // of a Scan operator table. For example:
 //

diff --git a/pkg/sql/opt/xform/rules/select.opt b/pkg/sql/opt/xform/rules/select.opt
@@ -12,8 +12,8 @@
 # and examples.
 [GenerateConstrainedScans, Explore]
 (Select
-  (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate))
-  $filters:*
+    (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate))
+    $filters:*
 )
 =>
 (GenerateConstrainedScans $scanPrivate $filters)
@@ -22,8 +22,73 @@
 # be serviced by an inverted index.
 [GenerateInvertedIndexScans, Explore]
 (Select
-  (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate) & (HasInvertedIndexes $scanPrivate))
-  $filters:*
+    (Scan $scanPrivate:* & (IsCanonicalScan $scanPrivate) & (HasInvertedIndexes $scanPrivate))
+    $filters:*
 )
 =>
 (GenerateInvertedIndexScans $scanPrivate $filters)
+
+# GenerateUnionSelects splits disjunctions (Or expressions) into a Union of two
+# Select expressions, the first containing the left sub-expression of the Or
+# expression and the second containing the right sub-expression. All other
+# filter items in the original expression are preserved in the new Select
+# expressions.
+#
+# This can produce better query plans in cases where indexes cover both sides of
+# the Or expression. The execution plan can use both indexes to satisfy both
+# sides of the disjunction and union the results together.
+#
+# Note that this rule only matches Selects with canonical scans. Therefore scan
+# constraints do not need to be duplicated in the left and right scans of the
+# union.
+#
+# Also note that this rule only matches Selects that have strict keys. This is
+# required to prevent the generated Union from de-duplicating rows that have
+# the same selected values. For example, consider the following:
+#
+#     CREATE TABLE t (k INT PRIMARY KEY, a INT, b INT)
+#     INSERT INTO t VALUES (1, 1, 3)
+#     INSERT INTO t VALUES (2, 1, 3)
+#
+# The expected result of the following Select query is 2 rows, with values
+# (1, 3).
+#
+#     SELECT a, b FROM t WHERE a = 1 OR b = 3
+#
+# However, Union de-duplicates all tuples with the same set of values. So, the
+# query below returns only a single row.
+#
+#     SELECT a, b FROM t WHERE a = 1
+#     UNION
+#     SELECT a, b FROM t WHERE b = 3
+#
+# With a key in the output columns, each input row to the Union is guaranteed to
+# be unique, and therefore will not be incorrectly de-duplicated.
+[GenerateUnionSelects, Explore]
+(Select
+    $input:(Scan
+        $scanPrivate:* & (IsCanonicalScan $scanPrivate)
+    ) & (HasStrictKey $input)
+    $filters:[
+        ...
+        $item:(FiltersItem (Or $left:* $right:*)) &
+            ^(ColsAreEqual (ExprOuterCols $left) (ExprOuterCols $right))
+        ...
+    ]
+)
+=>
+(Union
+    (Select
+        $input
+        (ReplaceFiltersItem $filters $item $left)
+    )
+    (Select
+        (Scan $rightScan:(DuplicateScanPrivate $scanPrivate))
+        (MapScanFilterCols
+            (ReplaceFiltersItem $filters $item $right)
+            $scanPrivate
+            $rightScan
+        )
+    )
+    (MakeSetPrivateForUnionSelects $scanPrivate $rightScan $scanPrivate)
+)