Skip to content

Commit

Permalink
opt: do not cross-join input of inverted semi-join
Browse files Browse the repository at this point in the history
In #78685, we prevented `GenerateLookupJoins` from incorrect creating a
cross-join on the input of a semi-join, addressing #78681. This commit
address the same issue with `GenerateInvertedJoins`, which we originally
forgot to fix.

Informs #78681

Release note (bug fix): A bug has been fixed which caused the optimizer
to generate invalid query plans which could result in incorrect query
results. The bug, which has been present since version 21.1.0, can
appear if all of the following conditions are true:
  1. The query contains a semi-join, such as queries in the form
     `SELECT * FROM a WHERE EXISTS (SELECT * FROM b WHERE a.a @> b.b)`.
  2. The inner table has a multi-column inverted index containing the
     inverted column in the filter.
  3. The index prefix columns are constrained to a set of values via the
     filter or a `CHECK` constraint, e.g., with an `IN` operator. In the
     case of a `CHECK` constraint, the column is `NOT NULL`.
  • Loading branch information
mgartner committed Apr 5, 2022
1 parent 3bf035f commit e1c9619
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 38 deletions.
14 changes: 12 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/inverted_join_multi_column
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,8 @@ ORDER BY (lk, rk)
5 12
5 16

# Regression test for #59615. Ensure that invalid inverted joins are not created
# for left and anti joins.
# Regression test for #59615 and #78681. Ensure that invalid inverted joins are
# not created for left, semi, and anti joins.
statement ok
CREATE TABLE t59615_inv (
x INT NOT NULL CHECK (x in (1, 3)),
Expand All @@ -517,3 +517,13 @@ SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE NOT EXISTS (
----
"a"
"b"

statement ok
INSERT INTO t59615_inv VALUES (1, '"a"'::JSONB), (3, '"a"'::JSONB)

query T rowsort
SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE EXISTS (
SELECT * FROM t59615_inv t WHERE t.y @> u.y
)
----
"a"
16 changes: 10 additions & 6 deletions pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ func (c *CustomFuncs) generateLookupJoinsImpl(
// because constructing a cross join with foundVals will
// increase the size of the input. As a result, non-matching
// input rows will show up more than once in the output,
// which is incorrect (see #59615 and #78685).
// which is incorrect (see #59615 and #78681).
shouldBuildMultiSpanLookupJoin = true
break
}
Expand Down Expand Up @@ -1032,11 +1032,15 @@ func (c *CustomFuncs) GenerateInvertedJoins(
return
}

if len(foundVals) > 1 && (joinType == opt.LeftJoinOp || joinType == opt.AntiJoinOp) {
// We cannot create an inverted join in this case, because constructing
// a cross join with foundVals will increase the size of the input. As a
// result, non-matching input rows will show up more than once in the
// output, which is incorrect (see #59615).
if len(foundVals) > 1 &&
(joinType == opt.LeftJoinOp || joinType == opt.SemiJoinOp || joinType == opt.AntiJoinOp) {
// We cannot create an inverted join in this case, because
// constructing a cross join with foundVals will increase the
// size of the input. As a result, matching input rows will show
// up more than once in the output of a semi-join, and
// non-matching input rows will show up more than once in the
// output of a left or anti join, which is incorrect (see #59615
// and #78681).
// TODO(rytaft,mgartner): find a way to create an inverted join for this
// case.
return
Expand Down
132 changes: 102 additions & 30 deletions pkg/sql/opt/xform/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -8873,38 +8873,53 @@ WHERE EXISTS (
SELECT * FROM json_arr1 AS t1 WHERE t1.j @> t2.j AND t1.i IN (3, 4)
)
----
semi-join (lookup json_arr1 [as=t1])
project
├── columns: k:1!null l:2 j:3 a:4
├── key columns: [22] = [7]
├── lookup columns are key
├── second join in paired joiner
├── immutable
├── key: (1)
├── fd: (1)-->(2-4)
├── inner-join (inverted json_arr1@j_idx [as=t1])
│ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 t1.k:22!null i:23!null continuation:36
│ ├── prefix key columns: [21] = [23]
│ ├── first join in paired joiner; continuation column: continuation:36
│ ├── inverted-expr
│ │ └── t1.j:24 @> t2.j:3
│ ├── fd: (1)-->(2-4), (22)-->(23,36)
│ ├── inner-join (cross)
│ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 "inverted_join_const_col_@8":21!null
│ │ ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more)
│ │ ├── fd: (1)-->(2-4)
│ │ ├── scan json_arr2 [as=t2]
│ │ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4
│ │ │ ├── key: (1)
│ │ │ └── fd: (1)-->(2-4)
│ │ ├── values
│ │ │ ├── columns: "inverted_join_const_col_@8":21!null
│ │ │ ├── cardinality: [2 - 2]
│ │ │ ├── (3,)
│ │ │ └── (4,)
│ │ └── filters (true)
│ └── filters (true)
└── filters
└── t1.j:9 @> t2.j:3 [outer=(3,9), immutable]
└── distinct-on
├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4
├── grouping columns: t2.k:1!null
├── immutable
├── key: (1)
├── fd: (1)-->(2-4)
├── inner-join (lookup json_arr1 [as=t1])
│ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 i:8!null t1.j:9
│ ├── key columns: [22] = [7]
│ ├── lookup columns are key
│ ├── immutable
│ ├── fd: (1)-->(2-4)
│ ├── inner-join (inverted json_arr1@j_idx [as=t1])
│ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 t1.k:22!null i:23!null
│ │ ├── prefix key columns: [21] = [23]
│ │ ├── inverted-expr
│ │ │ └── t1.j:24 @> t2.j:3
│ │ ├── fd: (1)-->(2-4), (22)-->(23)
│ │ ├── inner-join (cross)
│ │ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 "inverted_join_const_col_@8":21!null
│ │ │ ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more)
│ │ │ ├── fd: (1)-->(2-4)
│ │ │ ├── scan json_arr2 [as=t2]
│ │ │ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4
│ │ │ │ ├── key: (1)
│ │ │ │ └── fd: (1)-->(2-4)
│ │ │ ├── values
│ │ │ │ ├── columns: "inverted_join_const_col_@8":21!null
│ │ │ │ ├── cardinality: [2 - 2]
│ │ │ │ ├── (3,)
│ │ │ │ └── (4,)
│ │ │ └── filters (true)
│ │ └── filters (true)
│ └── filters
│ └── t1.j:9 @> t2.j:3 [outer=(3,9), immutable]
└── aggregations
├── const-agg [as=l:2, outer=(2)]
│ └── l:2
├── const-agg [as=t2.j:3, outer=(3)]
│ └── t2.j:3
└── const-agg [as=t2.a:4, outer=(4)]
└── t2.a:4

# Generate an inverted semi-join on a multi-column inverted index with the
# prefix column constrained by an equality constraint.
Expand Down Expand Up @@ -8996,8 +9011,8 @@ anti-join (lookup json_arr1 [as=t1])
└── filters
└── t1.j:9 @> t2.j:3 [outer=(3,9), immutable]

# Regression test for #59615. Ensure that invalid inverted joins are not created
# for left and anti joins.
# Regression test for #59615 and #78681. Ensure that invalid inverted joins are
# not created for left, semi, and anti joins.
exec-ddl
CREATE TABLE t59615_inv (
x INT NOT NULL CHECK (x in (1, 3)),
Expand Down Expand Up @@ -9026,6 +9041,63 @@ right-join (cross)
└── filters
└── y:3 @> column1:1 [outer=(1,3), immutable]

# GenerateInvertedJoins fires in this case because the semi join is converted to
# an inner join by ConvertSemiToInnerJoin.
opt
SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE EXISTS (
SELECT * FROM t59615_inv t WHERE t.y @> u.y
)
----
project
├── columns: y:1!null
├── cardinality: [0 - 2]
├── immutable
└── distinct-on
├── columns: column1:1!null rownum:9!null
├── grouping columns: rownum:9!null
├── immutable
├── key: (9)
├── fd: (9)-->(1)
├── inner-join (lookup t59615_inv [as=t])
│ ├── columns: column1:1!null y:3 rownum:9!null
│ ├── key columns: [14] = [5]
│ ├── lookup columns are key
│ ├── immutable
│ ├── fd: (9)-->(1)
│ ├── inner-join (inverted t59615_inv@t59615_inv_x_y_idx [as=t])
│ │ ├── columns: column1:1!null rownum:9!null x:11!null rowid:14!null
│ │ ├── prefix key columns: [10] = [11]
│ │ ├── inverted-expr
│ │ │ └── y:12 @> column1:1
│ │ ├── fd: (9)-->(1), (14)-->(11)
│ │ ├── inner-join (cross)
│ │ │ ├── columns: column1:1!null rownum:9!null "inverted_join_const_col_@2":10!null
│ │ │ ├── cardinality: [4 - 4]
│ │ │ ├── multiplicity: left-rows(one-or-more), right-rows(one-or-more)
│ │ │ ├── fd: (9)-->(1)
│ │ │ ├── ordinality
│ │ │ │ ├── columns: column1:1!null rownum:9!null
│ │ │ │ ├── cardinality: [2 - 2]
│ │ │ │ ├── key: (9)
│ │ │ │ ├── fd: (9)-->(1)
│ │ │ │ └── values
│ │ │ │ ├── columns: column1:1!null
│ │ │ │ ├── cardinality: [2 - 2]
│ │ │ │ ├── ('"a"',)
│ │ │ │ └── ('"b"',)
│ │ │ ├── values
│ │ │ │ ├── columns: "inverted_join_const_col_@2":10!null
│ │ │ │ ├── cardinality: [2 - 2]
│ │ │ │ ├── (1,)
│ │ │ │ └── (3,)
│ │ │ └── filters (true)
│ │ └── filters (true)
│ └── filters
│ └── y:3 @> column1:1 [outer=(1,3), immutable]
└── aggregations
└── const-agg [as=column1:1, outer=(1)]
└── column1:1

opt expect-not=GenerateInvertedJoins
SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE NOT EXISTS (
SELECT * FROM t59615_inv t WHERE t.y @> u.y
Expand Down

0 comments on commit e1c9619

Please sign in to comment.