Skip to content

Commit

Permalink
opt: do not cross-join input of inverted semi-join
Browse files Browse the repository at this point in the history
In cockroachdb#78685, we prevented `GenerateLookupJoins` from incorrect creating a
cross-join on the input of a semi-join, addressing cockroachdb#78681. This commit
addresses the same issue with `GenerateInvertedJoins`, which we
originally forgot to fix.

Informs cockroachdb#78681

Release note (bug fix): A bug has been fixed which caused the optimizer
to generate invalid query plans which could result in incorrect query
results. The bug, which has been present since version 21.1.0, can
appear if all of the following conditions are true:
  1. The query contains a semi-join, such as queries in the form
     `SELECT * FROM a WHERE EXISTS (SELECT * FROM b WHERE a.a @> b.b)`.
  2. The inner table has a multi-column inverted index containing the
     inverted column in the filter.
  3. The index prefix columns are constrained to a set of values via the
     filter or a `CHECK` constraint, e.g., with an `IN` operator. In the
     case of a `CHECK` constraint, the column is `NOT NULL`.
  • Loading branch information
mgartner committed Apr 6, 2022
1 parent df0d2cc commit 9c12570
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 38 deletions.
14 changes: 12 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/inverted_join_multi_column
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,8 @@ ORDER BY (lk, rk)
5 12
5 16

# Regression test for #59615. Ensure that invalid inverted joins are not created
# for left and anti joins.
# Regression test for #59615 and #78681. Ensure that invalid inverted joins are
# not created for left, semi, and anti joins.
statement ok
CREATE TABLE t59615_inv (
x INT NOT NULL CHECK (x in (1, 3)),
Expand All @@ -517,3 +517,13 @@ SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE NOT EXISTS (
----
"a"
"b"

statement ok
INSERT INTO t59615_inv VALUES (1, '"a"'::JSONB), (3, '"a"'::JSONB)

query T rowsort
SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE EXISTS (
SELECT * FROM t59615_inv t WHERE t.y @> u.y
)
----
"a"
16 changes: 10 additions & 6 deletions pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ func (c *CustomFuncs) generateLookupJoinsImpl(
// because constructing a cross join with foundVals will
// increase the size of the input. As a result, non-matching
// input rows will show up more than once in the output,
// which is incorrect (see #59615 and #78685).
// which is incorrect (see #59615 and #78681).
shouldBuildMultiSpanLookupJoin = true
break
}
Expand Down Expand Up @@ -1032,11 +1032,15 @@ func (c *CustomFuncs) GenerateInvertedJoins(
return
}

if len(foundVals) > 1 && (joinType == opt.LeftJoinOp || joinType == opt.AntiJoinOp) {
// We cannot create an inverted join in this case, because constructing
// a cross join with foundVals will increase the size of the input. As a
// result, non-matching input rows will show up more than once in the
// output, which is incorrect (see #59615).
if len(foundVals) > 1 &&
(joinType == opt.LeftJoinOp || joinType == opt.SemiJoinOp || joinType == opt.AntiJoinOp) {
// We cannot create an inverted join in this case, because
// constructing a cross join with foundVals will increase the
// size of the input. As a result, matching input rows will show
// up more than once in the output of a semi-join, and
// non-matching input rows will show up more than once in the
// output of a left or anti join, which is incorrect (see #59615
// and #78681).
// TODO(rytaft,mgartner): find a way to create an inverted join for this
// case.
return
Expand Down
100 changes: 70 additions & 30 deletions pkg/sql/opt/xform/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -8827,38 +8827,53 @@ WHERE EXISTS (
SELECT * FROM json_arr1 AS t1 WHERE t1.j @> t2.j AND t1.i IN (3, 4)
)
----
semi-join (lookup json_arr1 [as=t1])
project
├── columns: k:1!null l:2 j:3 a:4
├── key columns: [22] = [7]
├── lookup columns are key
├── second join in paired joiner
├── immutable
├── key: (1)
├── fd: (1)-->(2-4)
├── inner-join (inverted json_arr1@j_idx [as=t1])
│ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 t1.k:22!null i:23!null continuation:36
│ ├── prefix key columns: [21] = [23]
│ ├── first join in paired joiner; continuation column: continuation:36
│ ├── inverted-expr
│ │ └── t1.j:24 @> t2.j:3
│ ├── fd: (1)-->(2-4), (22)-->(23,36)
│ ├── inner-join (cross)
│ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 "inverted_join_const_col_@8":21!null
│ │ ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more)
│ │ ├── fd: (1)-->(2-4)
│ │ ├── scan json_arr2 [as=t2]
│ │ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4
│ │ │ ├── key: (1)
│ │ │ └── fd: (1)-->(2-4)
│ │ ├── values
│ │ │ ├── columns: "inverted_join_const_col_@8":21!null
│ │ │ ├── cardinality: [2 - 2]
│ │ │ ├── (3,)
│ │ │ └── (4,)
│ │ └── filters (true)
│ └── filters (true)
└── filters
└── t1.j:9 @> t2.j:3 [outer=(3,9), immutable]
└── distinct-on
├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4
├── grouping columns: t2.k:1!null
├── immutable
├── key: (1)
├── fd: (1)-->(2-4)
├── inner-join (lookup json_arr1 [as=t1])
│ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 i:8!null t1.j:9
│ ├── key columns: [22] = [7]
│ ├── lookup columns are key
│ ├── immutable
│ ├── fd: (1)-->(2-4)
│ ├── inner-join (inverted json_arr1@j_idx [as=t1])
│ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 t1.k:22!null i:23!null
│ │ ├── prefix key columns: [21] = [23]
│ │ ├── inverted-expr
│ │ │ └── t1.j:24 @> t2.j:3
│ │ ├── fd: (1)-->(2-4), (22)-->(23)
│ │ ├── inner-join (cross)
│ │ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4 "inverted_join_const_col_@8":21!null
│ │ │ ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more)
│ │ │ ├── fd: (1)-->(2-4)
│ │ │ ├── scan json_arr2 [as=t2]
│ │ │ │ ├── columns: t2.k:1!null l:2 t2.j:3 t2.a:4
│ │ │ │ ├── key: (1)
│ │ │ │ └── fd: (1)-->(2-4)
│ │ │ ├── values
│ │ │ │ ├── columns: "inverted_join_const_col_@8":21!null
│ │ │ │ ├── cardinality: [2 - 2]
│ │ │ │ ├── (3,)
│ │ │ │ └── (4,)
│ │ │ └── filters (true)
│ │ └── filters (true)
│ └── filters
│ └── t1.j:9 @> t2.j:3 [outer=(3,9), immutable]
└── aggregations
├── const-agg [as=l:2, outer=(2)]
│ └── l:2
├── const-agg [as=t2.j:3, outer=(3)]
│ └── t2.j:3
└── const-agg [as=t2.a:4, outer=(4)]
└── t2.a:4

# Generate an inverted semi-join on a multi-column inverted index with the
# prefix column constrained by an equality constraint.
Expand Down Expand Up @@ -8950,8 +8965,8 @@ anti-join (lookup json_arr1 [as=t1])
└── filters
└── t1.j:9 @> t2.j:3 [outer=(3,9), immutable]

# Regression test for #59615. Ensure that invalid inverted joins are not created
# for left and anti joins.
# Regression test for #59615 and #78681. Ensure that invalid inverted joins are
# not created for left, semi, and anti joins.
exec-ddl
CREATE TABLE t59615_inv (
x INT NOT NULL CHECK (x in (1, 3)),
Expand Down Expand Up @@ -8980,6 +8995,31 @@ right-join (cross)
└── filters
└── y:3 @> column1:1 [outer=(1,3), immutable]

# Disable ConvertSemiToInnerJoin to prevent GenerateInvertedJoins from firing
# for the converted inner join. With the expect-not option, we get added
# assurance that GenerateInvertedJoins is not incorrectly firing for the
# semi-join.
opt disable=ConvertSemiToInnerJoin expect-not=GenerateInvertedJoins
SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE EXISTS (
SELECT * FROM t59615_inv t WHERE t.y @> u.y
)
----
semi-join (cross)
├── columns: y:1!null
├── cardinality: [0 - 2]
├── immutable
├── values
│ ├── columns: column1:1!null
│ ├── cardinality: [2 - 2]
│ ├── ('"a"',)
│ └── ('"b"',)
├── scan t59615_inv [as=t]
│ ├── columns: y:3
│ └── check constraint expressions
│ └── x:2 IN (1, 3) [outer=(2), constraints=(/2: [/1 - /1] [/3 - /3]; tight)]
└── filters
└── y:3 @> column1:1 [outer=(1,3), immutable]

opt expect-not=GenerateInvertedJoins
SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE NOT EXISTS (
SELECT * FROM t59615_inv t WHERE t.y @> u.y
Expand Down

0 comments on commit 9c12570

Please sign in to comment.