From 7af0002e7c2e252eab1d9f21e82804d433bb1ddb Mon Sep 17 00:00:00 2001 From: Marcus Gartner Date: Thu, 22 Sep 2022 13:55:14 -0400 Subject: [PATCH] opt: do not plan unnecessary paired semi- and anti- lookup joins This commit fixes an issue where the optimizer would plan a paired semi or anti lookup join in cases when a single lookup join would suffice. This only occurred in rare cases when the join filter contained a tautology or contradiction that could not be normalized to true or false in the canonical query plan, but could be eliminated from the filters when building a lookup join. If the tautology or contradiction referenced a column not covered by the lookup index, the optimizer mistakenly assumed that the index was not covering and planned a paired join. Now the optimizer can recognize that the index is actually covering, because the referenced column is not needed to evaluate the filters, and a single lookup join is planned. Fixes #87306 Release note (performance improvement): The optimizer now explores plans with a single lookup join expressions in rare cases where it previously planned two lookup join expressions. --- pkg/sql/opt/lookupjoin/testdata/key_cols | 9 ++++++ pkg/sql/opt/xform/join_funcs.go | 37 +++++++++++++++++++----- pkg/sql/opt/xform/testdata/rules/join | 28 ++++++++++++++++++ 3 files changed, 66 insertions(+), 8 deletions(-) diff --git a/pkg/sql/opt/lookupjoin/testdata/key_cols b/pkg/sql/opt/lookupjoin/testdata/key_cols index bf4b8aecd6ce..9d2baee87214 100644 --- a/pkg/sql/opt/lookupjoin/testdata/key_cols +++ b/pkg/sql/opt/lookupjoin/testdata/key_cols @@ -156,3 +156,12 @@ key cols: y = b input projections: lookup_join_const_col_@7 = 1 + +lookup-constraints left=(a int) right=(x int, y int, z int) index=(x, z) +a = z AND (x = 0 OR y IN (0) AND y > 0) +---- +key cols: + x = lookup_join_const_col_@5 + z = a +input projections: + lookup_join_const_col_@5 = 0 diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 2cd6d6f13e08..f56b67b4d045 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -436,7 +436,8 @@ func (c *CustomFuncs) generateLookupJoinsImpl( tableFDs := memo.MakeTableFuncDep(md, scanPrivate.Table) // A lookup join will drop any input row which contains NULLs, so a lax key // is sufficient. - lookupJoin.LookupColsAreTableKey = tableFDs.ColsAreLaxKey(lookupConstraint.RightSideCols.ToSet()) + rightKeyCols := lookupConstraint.RightSideCols.ToSet() + lookupJoin.LookupColsAreTableKey = tableFDs.ColsAreLaxKey(rightKeyCols) // Add input columns and lookup expression columns, since these will be // needed for all join types and cases. Exclude synthesized projection @@ -493,13 +494,14 @@ func (c *CustomFuncs) generateLookupJoinsImpl( return } - _, isPartial := index.Predicate() - if isPartial && (joinType == opt.SemiJoinOp || joinType == opt.AntiJoinOp) { + if joinType == opt.SemiJoinOp || joinType == opt.AntiJoinOp { // Typically, the index must cover all columns from the right in // order to generate a lookup join without an additional index join - // (case 1, see function comment). However, if the index is a - // partial index, the filters remaining after proving - // filter-predicate implication may no longer reference some + // (case 1, see function comment). However, there are some cases + // where the remaining filters no longer reference some columns. + // + // 1. If the index is a partial index, the filters remaining after + // proving filter-predicate implication may no longer reference some // columns. A lookup semi- or anti-join can be generated if the // columns in the new filters from the right side of the join are // covered by the index. Consider the example: @@ -515,13 +517,32 @@ func (c *CustomFuncs) generateLookupJoinsImpl( // Column y is no longer referenced, so a lookup semi-join can be // created despite the partial index not covering y. // - // Note that this is a special case that only works for semi- and + // 2. If onFilters contain a contradiction or tautology that is not + // normalized away, then columns may no longer be referenced in the + // remaining filters of the lookup join. Consider the example: + // + // CREATE TABLE a (a INT) + // CREATE TABLE xyz (x INT, y INT, z INT, INDEX (x, z)) + // + // SELECT a FROM a WHERE a IN ( + // SELECT z FROM xyz WHERE x = 0 OR y IN (0) AND y > 0 + // ) + // + // The filter x = 0 OR y IN (0) AND y > 0 contains a contradiction + // that currently is not normalized to false, but a tight constraint + // is created for entire filter that constrains x to 0. Because the + // filter is tight, there is no remaining filter. Column y is no + // longer referenced, so a lookup semi-join can be created despite + // the secondary index not covering y. + // + // Note that these are special cases that only work for semi- and // anti-joins because they never include columns from the right side // in their output columns. Other joins include columns from the // right side in their output columns, so even if the ON filters no // longer reference an un-covered column, they must be fetched (case // 2, see function comment). - filterColsFromRight := rightCols.Intersection(onFilters.OuterCols()) + remainingFilterCols := rightKeyCols.Union(lookupJoin.On.OuterCols()) + filterColsFromRight := rightCols.Intersection(remainingFilterCols) if filterColsFromRight.SubsetOf(indexCols) { lookupJoin.Cols.UnionWith(filterColsFromRight) c.e.mem.AddLookupJoinToGroup(&lookupJoin, grp) diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 4e04cb6887ea..f80d3f101eb8 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -12077,6 +12077,34 @@ project └── projections └── NULL [as="?column?":11] +# Regression test for #87306. Do not plan a paired join if there are no columns +# to fetch from the RHS's primary index. +exec-ddl +CREATE TABLE t87306 ( + a INT, + b INT, + c INT, + INDEX (a, c) +) +---- + +opt expect=GenerateLookupJoinsWithFilter +SELECT m FROM small WHERE m IN ( + SELECT c FROM t87306 WHERE a = 0 OR b IN (0) AND b > 0 +) +---- +semi-join (lookup t87306@t87306_a_c_idx) + ├── columns: m:1 + ├── key columns: [12 1] = [6 8] + ├── project + │ ├── columns: "lookup_join_const_col_@6":12!null m:1 + │ ├── fd: ()-->(12) + │ ├── scan small + │ │ └── columns: m:1 + │ └── projections + │ └── 0 [as="lookup_join_const_col_@6":12] + └── filters (true) + # -------------------------------------------------- # SplitDisjunctionOfJoinTerms # --------------------------------------------------