Skip to content

Commit

Permalink
opt: fix bug with incorrect results returned by left lookup/inverted …
Browse files Browse the repository at this point in the history
…join

This commit fixes a bug in which incorrect results could be returned for left
and anti lookup and inverted joins. This could happen when one of the prefix
columns was constrained to more than one constant value, either due to a check
constraint or an IN clause. The current implementation used to support lookup
and inverted joins for this case creates a cross-join between the constant
values and the input, thus artifically increasing the size of the input to the
join. This works for inner and semi joins, but it is not correct for left or
anti joins, because non-matching input rows will be returned multiple times in
the output, which is incorrect.

This commit fixes the bug by avoiding creating lookup or inverted joins in
these cases. However, this is suboptimal for performance, and in a future
commit we should try to find a way to create left/anti inverted/lookup joins
safely for these cases.

Fixes #59615

Release note (bug fix): Fixed a bug in which incorrect results could be
returned for left and anti joins. This could happen when one of the columns on
one side of the join was constrained to multiple constant values, either due to
a check constraint or an IN clause. The bug resulted in non-matching input rows
getting returned multiple times in the output, which is incorrect. This bug
only affected previous alpha releases of 21.1, and has now been fixed.
  • Loading branch information
rytaft committed Feb 1, 2021
1 parent e3158f9 commit 11a600b
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 112 deletions.
24 changes: 24 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/inverted_join_multi_column
Original file line number Diff line number Diff line change
Expand Up @@ -494,3 +494,27 @@ ORDER BY (lk, rk)
3 16
5 12
5 16

# Regression test for #59615. Ensure that invalid inverted joins are not created
# for left and anti joins.
statement ok
CREATE TABLE t59615_inv (
x INT NOT NULL CHECK (x in (1, 3)),
y JSON,
z INT,
INVERTED INDEX (x, y)
)

query TITI
SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) LEFT JOIN t59615_inv t ON t.y @> u.y
----
"a" NULL NULL NULL
"b" NULL NULL NULL

query T
SELECT * FROM (VALUES ('"a"'::jsonb), ('"b"'::jsonb)) AS u(y) WHERE NOT EXISTS (
SELECT * FROM t59615_inv t WHERE t.y @> u.y
)
----
"a"
"b"
25 changes: 25 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/lookup_join
Original file line number Diff line number Diff line change
Expand Up @@ -588,3 +588,28 @@ CREATE TABLE tab4 (
query I
SELECT pk FROM tab4 WHERE col0 IN (SELECT col3 FROM tab4 WHERE col4 = 495.6) AND (col3 IS NULL)
----

# Regression test for #59615. Ensure that invalid lookup joins are not created
# for left and anti joins.
statement ok
CREATE TABLE t59615 (
x INT NOT NULL CHECK (x in (1, 3)),
y INT NOT NULL,
z INT,
PRIMARY KEY (x, y)
)

# We cannot create a lookup join in this case.
query IIII
SELECT * FROM (VALUES (1), (2)) AS u(y) LEFT JOIN t59615 t ON u.y = t.y
----
1 NULL NULL NULL
2 NULL NULL NULL

query I
SELECT * FROM (VALUES (1), (2)) AS u(y) WHERE NOT EXISTS (
SELECT * FROM t59615 t WHERE u.y = t.y
)
----
1
2
146 changes: 60 additions & 86 deletions pkg/sql/opt/exec/execbuilder/testdata/unique
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,8 @@ vectorized: true
label: buffer 1

# Test that we use the index when available for the ON CONFLICT checks.
# TODO(rytaft): This plan is suboptimal because it's not safe to construct
# a left lookup join in this case (see #59649).
query T
EXPLAIN (VERBOSE) INSERT INTO uniq_enum VALUES ('us-west', 'foo', 1, 1), ('us-east', 'bar', 2, 2)
ON CONFLICT DO NOTHING
Expand Down Expand Up @@ -835,73 +837,53 @@ vectorized: true
│ │ render column3: column3
│ │ render column4: column4
│ │
│ └── • project
│ └── • hash join (right anti)
│ │ columns: (column1, column2, column3, column4)
│ │ estimated row count: 0 (missing stats)
│ │ equality: (i) = (column3)
│ │
│ ├── • scan
│ │ columns: (i)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: uniq_enum@primary
│ │ spans: FULL SCAN
│ │
│ └── • lookup join (anti)
│ │ columns: ("lookup_join_const_col_@29", column1, column2, column3, column4)
│ │ table: uniq_enum@uniq_enum_r_s_j_key
│ │ equality: (lookup_join_const_col_@29, column2, column4) = (r,s,j)
│ │ columns: (column1, column2, column3, column4)
│ │ estimated row count: 0 (missing stats)
│ │ table: uniq_enum@primary
│ │ equality: (column1, column3) = (r,i)
│ │ equality cols are key
│ │
│ └── • cross join (inner)
│ │ columns: ("lookup_join_const_col_@29", column1, column2, column3, column4)
│ └── • lookup join (anti)
│ │ columns: (column1, column2, column3, column4)
│ │ estimated row count: 0 (missing stats)
│ │ table: uniq_enum@uniq_enum_r_s_j_key
│ │ equality: (column1, column2, column4) = (r,s,j)
│ │ equality cols are key
│ │
│ ├── • values
│ │ columns: ("lookup_join_const_col_@29")
│ │ size: 1 column, 3 rows
│ │ row 0, expr 0: 'us-east'
│ │ row 1, expr 0: 'us-west'
│ │ row 2, expr 0: 'eu-west'
│ │
│ └── • project
│ └── • hash join (right anti)
│ │ columns: (column1, column2, column3, column4)
│ │ estimated row count: 0 (missing stats)
│ │ equality: (s, j) = (column2, column4)
│ │
│ └── • lookup join (anti)
│ │ columns: ("lookup_join_const_col_@23", column1, column2, column3, column4)
│ │ table: uniq_enum@primary
│ │ equality: (lookup_join_const_col_@23, column3) = (r,i)
│ │ equality cols are key
│ │
│ └── • cross join (inner)
│ │ columns: ("lookup_join_const_col_@23", column1, column2, column3, column4)
│ │ estimated row count: 0 (missing stats)
│ │
│ ├── • values
│ │ columns: ("lookup_join_const_col_@23")
│ │ size: 1 column, 3 rows
│ │ row 0, expr 0: 'us-east'
│ │ row 1, expr 0: 'us-west'
│ │ row 2, expr 0: 'eu-west'
│ │
│ └── • lookup join (anti)
│ │ columns: (column1, column2, column3, column4)
│ │ estimated row count: 0 (missing stats)
│ │ table: uniq_enum@primary
│ │ equality: (column1, column3) = (r,i)
│ │ equality cols are key
│ │
│ └── • lookup join (anti)
│ │ columns: (column1, column2, column3, column4)
│ │ estimated row count: 0 (missing stats)
│ │ table: uniq_enum@uniq_enum_r_s_j_key
│ │ equality: (column1, column2, column4) = (r,s,j)
│ │ equality cols are key
│ │
│ └── • values
│ columns: (column1, column2, column3, column4)
│ size: 4 columns, 2 rows
│ row 0, expr 0: 'us-west'
│ row 0, expr 1: 'foo'
│ row 0, expr 2: 1
│ row 0, expr 3: 1
│ row 1, expr 0: 'us-east'
│ row 1, expr 1: 'bar'
│ row 1, expr 2: 2
│ row 1, expr 3: 2
│ ├── • scan
│ │ columns: (s, j)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: uniq_enum@primary
│ │ spans: FULL SCAN
│ │
│ └── • values
│ columns: (column1, column2, column3, column4)
│ size: 4 columns, 2 rows
│ row 0, expr 0: 'us-west'
│ row 0, expr 1: 'foo'
│ row 0, expr 2: 1
│ row 0, expr 3: 1
│ row 1, expr 0: 'us-east'
│ row 1, expr 1: 'bar'
│ row 1, expr 2: 2
│ row 1, expr 3: 2
├── • constraint-check
│ │
Expand Down Expand Up @@ -2313,6 +2295,8 @@ vectorized: true
label: buffer 1

# Test that we use the index when available for the ON CONFLICT checks.
# TODO(rytaft): This plan is suboptimal because it's not safe to construct
# a left lookup join in this case (see #59649).
query T
EXPLAIN (VERBOSE) INSERT INTO uniq_enum VALUES ('us-west', 'foo', 1, 1), ('us-east', 'bar', 2, 2)
ON CONFLICT (s, j) DO UPDATE SET i = 3
Expand Down Expand Up @@ -2369,38 +2353,28 @@ vectorized: true
│ │ render i: i
│ │ render j: j
│ │
│ └── • project
│ │ columns: (column1, column2, column3, column4, r, s, i, j)
│ └── • hash join (right outer)
│ │ columns: (r, s, i, j, column1, column2, column3, column4)
│ │ estimated row count: 2 (missing stats)
│ │ equality: (s, j) = (column2, column4)
│ │
│ └── • lookup join (left outer)
│ │ columns: ("lookup_join_const_col_@11", column1, column2, column3, column4, r, s, i, j)
│ │ table: uniq_enum@uniq_enum_r_s_j_key
│ │ equality: (lookup_join_const_col_@11, column2, column4) = (r,s,j)
│ │ equality cols are key
│ │
│ └── • cross join (inner)
│ │ columns: ("lookup_join_const_col_@11", column1, column2, column3, column4)
│ │ estimated row count: 6
│ │
│ ├── • values
│ │ columns: ("lookup_join_const_col_@11")
│ │ size: 1 column, 3 rows
│ │ row 0, expr 0: 'us-east'
│ │ row 1, expr 0: 'us-west'
│ │ row 2, expr 0: 'eu-west'
│ │
│ └── • values
│ columns: (column1, column2, column3, column4)
│ size: 4 columns, 2 rows
│ row 0, expr 0: 'us-west'
│ row 0, expr 1: 'foo'
│ row 0, expr 2: 1
│ row 0, expr 3: 1
│ row 1, expr 0: 'us-east'
│ row 1, expr 1: 'bar'
│ row 1, expr 2: 2
│ row 1, expr 3: 2
│ ├── • scan
│ │ columns: (r, s, i, j)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: uniq_enum@primary
│ │ spans: FULL SCAN
│ │
│ └── • values
│ columns: (column1, column2, column3, column4)
│ size: 4 columns, 2 rows
│ row 0, expr 0: 'us-west'
│ row 0, expr 1: 'foo'
│ row 0, expr 2: 1
│ row 0, expr 3: 1
│ row 1, expr 0: 'us-east'
│ row 1, expr 1: 'bar'
│ row 1, expr 2: 2
│ row 1, expr 3: 2
├── • constraint-check
│ │
Expand Down
20 changes: 20 additions & 0 deletions pkg/sql/opt/xform/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,16 @@ func (c *CustomFuncs) GenerateLookupJoins(
break
}

if len(foundVals) > 1 && (joinType == opt.LeftJoinOp || joinType == opt.AntiJoinOp) {
// We cannot create a lookup join in this case, because constructing a
// cross join with foundVals will increase the size of the input. As a
// result, non-matching input rows will show up more than once in the
// output, which is incorrect (see #59615).
// TODO(rytaft,mgartner): find a way to create a lookup join for this
// case.
return
}

// We will join these constant values with the input to make
// equality columns for the lookup join.
if constFilters == nil {
Expand Down Expand Up @@ -531,6 +541,16 @@ func (c *CustomFuncs) GenerateInvertedJoins(
return
}

if len(foundVals) > 1 && (joinType == opt.LeftJoinOp || joinType == opt.AntiJoinOp) {
// We cannot create an inverted join in this case, because constructing
// a cross join with foundVals will increase the size of the input. As a
// result, non-matching input rows will show up more than once in the
// output, which is incorrect (see #59615).
// TODO(rytaft,mgartner): find a way to create an inverted join for this
// case.
return
}

// We will join these constant values with the input to make
// equality columns for the inverted join.
if constFilters == nil {
Expand Down
Loading

0 comments on commit 11a600b

Please sign in to comment.