diff --git a/pkg/sql/logictest/testdata/logic_test/lookup_join b/pkg/sql/logictest/testdata/logic_test/lookup_join index 6a620cc99b38..ab5641165ee9 100644 --- a/pkg/sql/logictest/testdata/logic_test/lookup_join +++ b/pkg/sql/logictest/testdata/logic_test/lookup_join @@ -575,6 +575,36 @@ SELECT * FROM (VALUES (1), (2)) AS u(y) WHERE NOT EXISTS ( 1 2 +# Regression test for #78681. Ensure that invalid lookup joins are not created +# for semi joins. +statement ok +CREATE TABLE t78681 ( + x INT NOT NULL CHECK (x in (1, 3)), + y INT NOT NULL, + PRIMARY KEY (x, y) +) + +# Insert stats so that a lookup semi-join is selected. +statement ok +ALTER TABLE t78681 INJECT STATISTICS '[ + { + "columns": ["x"], + "created_at": "2018-05-01 1:00:00.00000+00:00", + "row_count": 10000000, + "distinct_count": 2 + } +]' + +statement ok +INSERT INTO t78681 VALUES (1, 1), (3, 1) + +query I rowsort +SELECT * FROM (VALUES (1), (2)) AS u(y) WHERE EXISTS ( + SELECT * FROM t78681 t WHERE u.y = t.y +) +---- +1 + statement ok CREATE TABLE lookup_expr ( r STRING NOT NULL CHECK (r IN ('east', 'west')), diff --git a/pkg/sql/logictest/testdata/logic_test/unique b/pkg/sql/logictest/testdata/logic_test/unique index 1e031350d784..6f73d601c2aa 100644 --- a/pkg/sql/logictest/testdata/logic_test/unique +++ b/pkg/sql/logictest/testdata/logic_test/unique @@ -274,7 +274,7 @@ INSERT INTO uniq_enum VALUES ('us-west', 'foo', 1, 1), ('eu-west', 'bar', 2, 2) # index, and the prefix of the index is an enum. This case uses the default # value for columns r and j. statement error pgcode 23505 pq: duplicate key value violates unique constraint "unique_i"\nDETAIL: Key \(i\)=\(1\) already exists\. -INSERT INTO uniq_enum (s, i) VALUES ('foo', 1), ('bar', 2) +INSERT INTO uniq_enum (s, i) VALUES ('foo', 1), ('bar', 3) query TTII colnames,rowsort SELECT * FROM uniq_enum diff --git a/pkg/sql/opt/exec/execbuilder/testdata/unique b/pkg/sql/opt/exec/execbuilder/testdata/unique index fd8c4986cefb..6454e1396fda 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/unique +++ b/pkg/sql/opt/exec/execbuilder/testdata/unique @@ -751,31 +751,47 @@ vectorized: true │ │ columns: (column1, column3) │ │ estimated row count: 1 (missing stats) │ │ -│ └── • lookup join (semi) -│ │ columns: ("lookup_join_const_col_@12", column1, column3) -│ │ table: uniq_enum@primary -│ │ equality: (lookup_join_const_col_@12, column3) = (r,i) -│ │ equality cols are key -│ │ pred: column1 != r +│ └── • distinct +│ │ columns: (column1, column3, rownum) +│ │ estimated row count: 2 (missing stats) +│ │ distinct on: rownum │ │ -│ └── • cross join (inner) -│ │ columns: ("lookup_join_const_col_@12", column1, column3) -│ │ estimated row count: 6 -│ │ -│ ├── • values -│ │ columns: ("lookup_join_const_col_@12") -│ │ size: 1 column, 3 rows -│ │ row 0, expr 0: 'us-east' -│ │ row 1, expr 0: 'us-west' -│ │ row 2, expr 0: 'eu-west' +│ └── • project +│ │ columns: (column1, column3, rownum) │ │ │ └── • project -│ │ columns: (column1, column3) -│ │ estimated row count: 2 +│ │ columns: (r, i, column1, column3, rownum) +│ │ estimated row count: 7 (missing stats) │ │ -│ └── • scan buffer -│ columns: (column1, column2, column3, column4, check1) -│ label: buffer 1 +│ └── • lookup join (inner) +│ │ columns: ("lookup_join_const_col_@12", column1, column3, rownum, r, i) +│ │ table: uniq_enum@primary +│ │ equality: (lookup_join_const_col_@12, column3) = (r,i) +│ │ equality cols are key +│ │ pred: column1 != r +│ │ +│ └── • cross join (inner) +│ │ columns: ("lookup_join_const_col_@12", column1, column3, rownum) +│ │ estimated row count: 6 +│ │ +│ ├── • values +│ │ columns: ("lookup_join_const_col_@12") +│ │ size: 1 column, 3 rows +│ │ row 0, expr 0: 'us-east' +│ │ row 1, expr 0: 'us-west' +│ │ row 2, expr 0: 'eu-west' +│ │ +│ └── • ordinality +│ │ columns: (column1, column3, rownum) +│ │ estimated row count: 2 +│ │ +│ └── • project +│ │ columns: (column1, column3) +│ │ estimated row count: 2 +│ │ +│ └── • scan buffer +│ columns: (column1, column2, column3, column4, check1) +│ label: buffer 1 │ └── • constraint-check │ @@ -790,31 +806,47 @@ vectorized: true │ columns: (column1, column2, column3, column4) │ estimated row count: 1 (missing stats) │ - └── • lookup join (semi) - │ columns: ("lookup_join_const_col_@22", column1, column2, column3, column4) - │ table: uniq_enum@uniq_enum_r_s_j_key - │ equality: (lookup_join_const_col_@22, column2, column4) = (r,s,j) - │ equality cols are key - │ pred: (column1 != r) OR (column3 != i) + └── • distinct + │ columns: (column1, column2, column3, column4, rownum) + │ estimated row count: 0 (missing stats) + │ distinct on: rownum │ - └── • cross join (inner) - │ columns: ("lookup_join_const_col_@22", column1, column2, column3, column4) - │ estimated row count: 6 - │ - ├── • values - │ columns: ("lookup_join_const_col_@22") - │ size: 1 column, 3 rows - │ row 0, expr 0: 'us-east' - │ row 1, expr 0: 'us-west' - │ row 2, expr 0: 'eu-west' + └── • project + │ columns: (column1, column2, column3, column4, rownum) │ └── • project - │ columns: (column1, column2, column3, column4) - │ estimated row count: 2 + │ columns: (r, s, i, j, column1, column2, column3, column4, rownum) + │ estimated row count: 0 (missing stats) │ - └── • scan buffer - columns: (column1, column2, column3, column4, check1) - label: buffer 1 + └── • lookup join (inner) + │ columns: ("lookup_join_const_col_@22", column1, column2, column3, column4, rownum, r, s, i, j) + │ table: uniq_enum@uniq_enum_r_s_j_key + │ equality: (lookup_join_const_col_@22, column2, column4) = (r,s,j) + │ equality cols are key + │ pred: (column1 != r) OR (column3 != i) + │ + └── • cross join (inner) + │ columns: ("lookup_join_const_col_@22", column1, column2, column3, column4, rownum) + │ estimated row count: 6 + │ + ├── • values + │ columns: ("lookup_join_const_col_@22") + │ size: 1 column, 3 rows + │ row 0, expr 0: 'us-east' + │ row 1, expr 0: 'us-west' + │ row 2, expr 0: 'eu-west' + │ + └── • ordinality + │ columns: (column1, column2, column3, column4, rownum) + │ estimated row count: 2 + │ + └── • project + │ columns: (column1, column2, column3, column4) + │ estimated row count: 2 + │ + └── • scan buffer + columns: (column1, column2, column3, column4, check1) + label: buffer 1 # Test that we use the index when available for the insert checks. This uses # the default value for columns r and j. @@ -874,31 +906,47 @@ vectorized: true │ columns: (r_default, column2) │ estimated row count: 1 (missing stats) │ - └── • lookup join (semi) - │ columns: ("lookup_join_const_col_@12", r_default, column2) - │ table: uniq_enum@primary - │ equality: (lookup_join_const_col_@12, column2) = (r,i) - │ equality cols are key - │ pred: r_default != r + └── • distinct + │ columns: (r_default, column2, rownum) + │ estimated row count: 2 (missing stats) + │ distinct on: rownum │ - └── • cross join (inner) - │ columns: ("lookup_join_const_col_@12", r_default, column2) - │ estimated row count: 6 - │ - ├── • values - │ columns: ("lookup_join_const_col_@12") - │ size: 1 column, 3 rows - │ row 0, expr 0: 'us-east' - │ row 1, expr 0: 'us-west' - │ row 2, expr 0: 'eu-west' + └── • project + │ columns: (r_default, column2, rownum) │ └── • project - │ columns: (r_default, column2) - │ estimated row count: 2 + │ columns: (r, i, r_default, column2, rownum) + │ estimated row count: 7 (missing stats) │ - └── • scan buffer - columns: (r_default, column1, column2, j_default, check1) - label: buffer 1 + └── • lookup join (inner) + │ columns: ("lookup_join_const_col_@12", r_default, column2, rownum, r, i) + │ table: uniq_enum@primary + │ equality: (lookup_join_const_col_@12, column2) = (r,i) + │ equality cols are key + │ pred: r_default != r + │ + └── • cross join (inner) + │ columns: ("lookup_join_const_col_@12", r_default, column2, rownum) + │ estimated row count: 6 + │ + ├── • values + │ columns: ("lookup_join_const_col_@12") + │ size: 1 column, 3 rows + │ row 0, expr 0: 'us-east' + │ row 1, expr 0: 'us-west' + │ row 2, expr 0: 'eu-west' + │ + └── • ordinality + │ columns: (r_default, column2, rownum) + │ estimated row count: 2 + │ + └── • project + │ columns: (r_default, column2) + │ estimated row count: 2 + │ + └── • scan buffer + columns: (r_default, column1, column2, j_default, check1) + label: buffer 1 # Test that we use the index when available for de-duplicating INSERT ON # CONFLICT DO NOTHING rows before inserting. @@ -1344,39 +1392,25 @@ vectorized: true │ columns: (column3) │ estimated row count: 1 │ - └── • project + └── • lookup join (semi) │ columns: (column1, column2, column3, column4) │ estimated row count: 1 + │ table: uniq_partial_enum@uniq_partial_enum_r_b_idx (partial index) + │ lookup condition: (column3 = b) AND (r IN ('us-east', 'us-west', 'eu-west')) + │ pred: (column1 != r) OR (column2 != a) │ - └── • lookup join (semi) - │ columns: ("lookup_join_const_col_@13", column1, column2, column3, column4) - │ table: uniq_partial_enum@uniq_partial_enum_r_b_idx (partial index) - │ equality: (lookup_join_const_col_@13, column3) = (r,b) - │ pred: (column1 != r) OR (column2 != a) + └── • filter + │ columns: (column1, column2, column3, column4) + │ estimated row count: 2 + │ filter: column4 IN ('bar', 'baz', 'foo') │ - └── • cross join (inner) - │ columns: ("lookup_join_const_col_@13", column1, column2, column3, column4) - │ estimated row count: 6 - │ - ├── • values - │ columns: ("lookup_join_const_col_@13") - │ size: 1 column, 3 rows - │ row 0, expr 0: 'us-east' - │ row 1, expr 0: 'us-west' - │ row 2, expr 0: 'eu-west' + └── • project + │ columns: (column1, column2, column3, column4) + │ estimated row count: 2 │ - └── • filter - │ columns: (column1, column2, column3, column4) - │ estimated row count: 2 - │ filter: column4 IN ('bar', 'baz', 'foo') - │ - └── • project - │ columns: (column1, column2, column3, column4) - │ estimated row count: 2 - │ - └── • scan buffer - columns: (column1, column2, column3, column4, check1, partial_index_put1) - label: buffer 1 + └── • scan buffer + columns: (column1, column2, column3, column4, check1, partial_index_put1) + label: buffer 1 # Test that we use the partial index when available for de-duplicating INSERT ON # CONFLICT DO NOTHING rows before inserting. @@ -2204,31 +2238,47 @@ vectorized: true │ │ columns: (r_new, i_new) │ │ estimated row count: 3 (missing stats) │ │ -│ └── • lookup join (semi) -│ │ columns: (r_new, i_new, "lookup_join_const_col_@17") -│ │ table: uniq_enum@primary -│ │ equality: (lookup_join_const_col_@17, i_new) = (r,i) -│ │ equality cols are key -│ │ pred: r_new != r +│ └── • distinct +│ │ columns: (r_new, i_new, rownum) +│ │ estimated row count: 9 (missing stats) +│ │ distinct on: rownum │ │ -│ └── • cross join (inner) -│ │ columns: (r_new, i_new, "lookup_join_const_col_@17") -│ │ estimated row count: 28 (missing stats) -│ │ -│ ├── • project -│ │ │ columns: (r_new, i_new) -│ │ │ estimated row count: 9 (missing stats) -│ │ │ -│ │ └── • scan buffer -│ │ columns: (r, s, i, j, r_new, s_new, i_new, check1) -│ │ label: buffer 1 +│ └── • project +│ │ columns: (r_new, i_new, rownum) │ │ -│ └── • values -│ columns: ("lookup_join_const_col_@17") -│ size: 1 column, 3 rows -│ row 0, expr 0: 'us-east' -│ row 1, expr 0: 'us-west' -│ row 2, expr 0: 'eu-west' +│ └── • project +│ │ columns: (r, i, r_new, i_new, rownum) +│ │ estimated row count: 31 (missing stats) +│ │ +│ └── • lookup join (inner) +│ │ columns: (r_new, i_new, rownum, "lookup_join_const_col_@17", r, i) +│ │ table: uniq_enum@primary +│ │ equality: (lookup_join_const_col_@17, i_new) = (r,i) +│ │ equality cols are key +│ │ pred: r_new != r +│ │ +│ └── • cross join (inner) +│ │ columns: (r_new, i_new, rownum, "lookup_join_const_col_@17") +│ │ estimated row count: 28 (missing stats) +│ │ +│ ├── • ordinality +│ │ │ columns: (r_new, i_new, rownum) +│ │ │ estimated row count: 9 (missing stats) +│ │ │ +│ │ └── • project +│ │ │ columns: (r_new, i_new) +│ │ │ estimated row count: 9 (missing stats) +│ │ │ +│ │ └── • scan buffer +│ │ columns: (r, s, i, j, r_new, s_new, i_new, check1) +│ │ label: buffer 1 +│ │ +│ └── • values +│ columns: ("lookup_join_const_col_@17") +│ size: 1 column, 3 rows +│ row 0, expr 0: 'us-east' +│ row 1, expr 0: 'us-west' +│ row 2, expr 0: 'eu-west' │ └── • constraint-check │ @@ -2243,31 +2293,47 @@ vectorized: true │ columns: (r_new, s_new, i_new, j) │ estimated row count: 3 (missing stats) │ - └── • lookup join (semi) - │ columns: (r_new, s_new, i_new, j, "lookup_join_const_col_@27") - │ table: uniq_enum@uniq_enum_r_s_j_key - │ equality: (lookup_join_const_col_@27, s_new, j) = (r,s,j) - │ equality cols are key - │ pred: (r_new != r) OR (i_new != i) + └── • distinct + │ columns: (r_new, s_new, i_new, j, rownum) + │ estimated row count: 0 (missing stats) + │ distinct on: rownum │ - └── • cross join (inner) - │ columns: (r_new, s_new, i_new, j, "lookup_join_const_col_@27") - │ estimated row count: 28 (missing stats) - │ - ├── • project - │ │ columns: (r_new, s_new, i_new, j) - │ │ estimated row count: 9 (missing stats) - │ │ - │ └── • scan buffer - │ columns: (r, s, i, j, r_new, s_new, i_new, check1) - │ label: buffer 1 + └── • project + │ columns: (r_new, s_new, i_new, j, rownum) │ - └── • values - columns: ("lookup_join_const_col_@27") - size: 1 column, 3 rows - row 0, expr 0: 'us-east' - row 1, expr 0: 'us-west' - row 2, expr 0: 'eu-west' + └── • project + │ columns: (r, s, i, j, r_new, s_new, i_new, j, rownum) + │ estimated row count: 0 (missing stats) + │ + └── • lookup join (inner) + │ columns: (r_new, s_new, i_new, j, rownum, "lookup_join_const_col_@27", r, s, i, j) + │ table: uniq_enum@uniq_enum_r_s_j_key + │ equality: (lookup_join_const_col_@27, s_new, j) = (r,s,j) + │ equality cols are key + │ pred: (r_new != r) OR (i_new != i) + │ + └── • cross join (inner) + │ columns: (r_new, s_new, i_new, j, rownum, "lookup_join_const_col_@27") + │ estimated row count: 28 (missing stats) + │ + ├── • ordinality + │ │ columns: (r_new, s_new, i_new, j, rownum) + │ │ estimated row count: 9 (missing stats) + │ │ + │ └── • project + │ │ columns: (r_new, s_new, i_new, j) + │ │ estimated row count: 9 (missing stats) + │ │ + │ └── • scan buffer + │ columns: (r, s, i, j, r_new, s_new, i_new, check1) + │ label: buffer 1 + │ + └── • values + columns: ("lookup_join_const_col_@27") + size: 1 column, 3 rows + row 0, expr 0: 'us-east' + row 1, expr 0: 'us-west' + row 2, expr 0: 'eu-west' # None of the updated values have nulls. query T @@ -2522,39 +2588,25 @@ vectorized: true │ columns: (b_new) │ estimated row count: 0 │ - └── • project + └── • lookup join (semi) │ columns: (r, a, b_new, c) │ estimated row count: 0 + │ table: uniq_partial_enum@uniq_partial_enum_r_b_idx (partial index) + │ lookup condition: (b_new = b) AND (r IN ('us-east', 'us-west', 'eu-west')) + │ pred: (r != r) OR (a != a) │ - └── • lookup join (semi) - │ columns: ("lookup_join_const_col_@16", r, a, b_new, c) - │ table: uniq_partial_enum@uniq_partial_enum_r_b_idx (partial index) - │ equality: (lookup_join_const_col_@16, b_new) = (r,b) - │ pred: (r != r) OR (a != a) + └── • filter + │ columns: (r, a, b_new, c) + │ estimated row count: 1 + │ filter: c IN ('bar', 'baz', 'foo') │ - └── • cross join (inner) - │ columns: ("lookup_join_const_col_@16", r, a, b_new, c) - │ estimated row count: 3 - │ - ├── • values - │ columns: ("lookup_join_const_col_@16") - │ size: 1 column, 3 rows - │ row 0, expr 0: 'us-east' - │ row 1, expr 0: 'us-west' - │ row 2, expr 0: 'eu-west' + └── • project + │ columns: (r, a, b_new, c) + │ estimated row count: 1 │ - └── • filter - │ columns: (r, a, b_new, c) - │ estimated row count: 1 - │ filter: c IN ('bar', 'baz', 'foo') - │ - └── • project - │ columns: (r, a, b_new, c) - │ estimated row count: 1 - │ - └── • scan buffer - columns: (r, a, b, b_new, partial_index_put1, partial_index_put1, c) - label: buffer 1 + └── • scan buffer + columns: (r, a, b, b_new, partial_index_put1, partial_index_put1, c) + label: buffer 1 # By default, we do not require checks on UUID columns set to gen_random_uuid(), # but we do for UUID columns set to other values. @@ -3422,31 +3474,47 @@ vectorized: true │ │ columns: (upsert_r, upsert_i) │ │ estimated row count: 1 (missing stats) │ │ -│ └── • lookup join (semi) -│ │ columns: ("lookup_join_const_col_@20", upsert_r, upsert_i) -│ │ table: uniq_enum@primary -│ │ equality: (lookup_join_const_col_@20, upsert_i) = (r,i) -│ │ equality cols are key -│ │ pred: upsert_r != r +│ └── • distinct +│ │ columns: (upsert_r, upsert_i, rownum) +│ │ estimated row count: 2 (missing stats) +│ │ distinct on: rownum │ │ -│ └── • cross join (inner) -│ │ columns: ("lookup_join_const_col_@20", upsert_r, upsert_i) -│ │ estimated row count: 6 (missing stats) -│ │ -│ ├── • values -│ │ columns: ("lookup_join_const_col_@20") -│ │ size: 1 column, 3 rows -│ │ row 0, expr 0: 'us-east' -│ │ row 1, expr 0: 'us-west' -│ │ row 2, expr 0: 'eu-west' +│ └── • project +│ │ columns: (upsert_r, upsert_i, rownum) │ │ │ └── • project -│ │ columns: (upsert_r, upsert_i) -│ │ estimated row count: 2 (missing stats) +│ │ columns: (r, i, upsert_r, upsert_i, rownum) +│ │ estimated row count: 7 (missing stats) │ │ -│ └── • scan buffer -│ columns: (column1, column2, column3, column4, r, s, i, j, column2, column4, r, check1, upsert_r, upsert_i) -│ label: buffer 1 +│ └── • lookup join (inner) +│ │ columns: ("lookup_join_const_col_@20", upsert_r, upsert_i, rownum, r, i) +│ │ table: uniq_enum@primary +│ │ equality: (lookup_join_const_col_@20, upsert_i) = (r,i) +│ │ equality cols are key +│ │ pred: upsert_r != r +│ │ +│ └── • cross join (inner) +│ │ columns: ("lookup_join_const_col_@20", upsert_r, upsert_i, rownum) +│ │ estimated row count: 6 (missing stats) +│ │ +│ ├── • values +│ │ columns: ("lookup_join_const_col_@20") +│ │ size: 1 column, 3 rows +│ │ row 0, expr 0: 'us-east' +│ │ row 1, expr 0: 'us-west' +│ │ row 2, expr 0: 'eu-west' +│ │ +│ └── • ordinality +│ │ columns: (upsert_r, upsert_i, rownum) +│ │ estimated row count: 2 (missing stats) +│ │ +│ └── • project +│ │ columns: (upsert_r, upsert_i) +│ │ estimated row count: 2 (missing stats) +│ │ +│ └── • scan buffer +│ columns: (column1, column2, column3, column4, r, s, i, j, column2, column4, r, check1, upsert_r, upsert_i) +│ label: buffer 1 │ └── • constraint-check │ @@ -3461,31 +3529,47 @@ vectorized: true │ columns: (upsert_r, column2, upsert_i, column4) │ estimated row count: 1 (missing stats) │ - └── • lookup join (semi) - │ columns: ("lookup_join_const_col_@30", upsert_r, column2, upsert_i, column4) - │ table: uniq_enum@uniq_enum_r_s_j_key - │ equality: (lookup_join_const_col_@30, column2, column4) = (r,s,j) - │ equality cols are key - │ pred: (upsert_r != r) OR (upsert_i != i) + └── • distinct + │ columns: (upsert_r, column2, upsert_i, column4, rownum) + │ estimated row count: 0 (missing stats) + │ distinct on: rownum │ - └── • cross join (inner) - │ columns: ("lookup_join_const_col_@30", upsert_r, column2, upsert_i, column4) - │ estimated row count: 6 (missing stats) - │ - ├── • values - │ columns: ("lookup_join_const_col_@30") - │ size: 1 column, 3 rows - │ row 0, expr 0: 'us-east' - │ row 1, expr 0: 'us-west' - │ row 2, expr 0: 'eu-west' + └── • project + │ columns: (upsert_r, column2, upsert_i, column4, rownum) │ └── • project - │ columns: (upsert_r, column2, upsert_i, column4) - │ estimated row count: 2 (missing stats) + │ columns: (r, s, i, j, upsert_r, column2, upsert_i, column4, rownum) + │ estimated row count: 0 (missing stats) │ - └── • scan buffer - columns: (column1, column2, column3, column4, r, s, i, j, column2, column4, r, check1, upsert_r, upsert_i) - label: buffer 1 + └── • lookup join (inner) + │ columns: ("lookup_join_const_col_@30", upsert_r, column2, upsert_i, column4, rownum, r, s, i, j) + │ table: uniq_enum@uniq_enum_r_s_j_key + │ equality: (lookup_join_const_col_@30, column2, column4) = (r,s,j) + │ equality cols are key + │ pred: (upsert_r != r) OR (upsert_i != i) + │ + └── • cross join (inner) + │ columns: ("lookup_join_const_col_@30", upsert_r, column2, upsert_i, column4, rownum) + │ estimated row count: 6 (missing stats) + │ + ├── • values + │ columns: ("lookup_join_const_col_@30") + │ size: 1 column, 3 rows + │ row 0, expr 0: 'us-east' + │ row 1, expr 0: 'us-west' + │ row 2, expr 0: 'eu-west' + │ + └── • ordinality + │ columns: (upsert_r, column2, upsert_i, column4, rownum) + │ estimated row count: 2 (missing stats) + │ + └── • project + │ columns: (upsert_r, column2, upsert_i, column4) + │ estimated row count: 2 (missing stats) + │ + └── • scan buffer + columns: (column1, column2, column3, column4, r, s, i, j, column2, column4, r, check1, upsert_r, upsert_i) + label: buffer 1 # Test that we use the index when available for the ON CONFLICT checks. query T @@ -3573,31 +3657,47 @@ vectorized: true │ columns: (upsert_r, upsert_i) │ estimated row count: 1 (missing stats) │ - └── • lookup join (semi) - │ columns: ("lookup_join_const_col_@23", upsert_r, upsert_i) - │ table: uniq_enum@primary - │ equality: (lookup_join_const_col_@23, upsert_i) = (r,i) - │ equality cols are key - │ pred: upsert_r != r + └── • distinct + │ columns: (upsert_r, upsert_i, rownum) + │ estimated row count: 2 (missing stats) + │ distinct on: rownum │ - └── • cross join (inner) - │ columns: ("lookup_join_const_col_@23", upsert_r, upsert_i) - │ estimated row count: 6 (missing stats) - │ - ├── • values - │ columns: ("lookup_join_const_col_@23") - │ size: 1 column, 3 rows - │ row 0, expr 0: 'us-east' - │ row 1, expr 0: 'us-west' - │ row 2, expr 0: 'eu-west' + └── • project + │ columns: (upsert_r, upsert_i, rownum) │ └── • project - │ columns: (upsert_r, upsert_i) - │ estimated row count: 2 (missing stats) + │ columns: (r, i, upsert_r, upsert_i, rownum) + │ estimated row count: 7 (missing stats) │ - └── • scan buffer - columns: (column1, column2, column3, column4, r, s, i, j, upsert_i, r, check1, upsert_r) - label: buffer 1 + └── • lookup join (inner) + │ columns: ("lookup_join_const_col_@23", upsert_r, upsert_i, rownum, r, i) + │ table: uniq_enum@primary + │ equality: (lookup_join_const_col_@23, upsert_i) = (r,i) + │ equality cols are key + │ pred: upsert_r != r + │ + └── • cross join (inner) + │ columns: ("lookup_join_const_col_@23", upsert_r, upsert_i, rownum) + │ estimated row count: 6 (missing stats) + │ + ├── • values + │ columns: ("lookup_join_const_col_@23") + │ size: 1 column, 3 rows + │ row 0, expr 0: 'us-east' + │ row 1, expr 0: 'us-west' + │ row 2, expr 0: 'eu-west' + │ + └── • ordinality + │ columns: (upsert_r, upsert_i, rownum) + │ estimated row count: 2 (missing stats) + │ + └── • project + │ columns: (upsert_r, upsert_i) + │ estimated row count: 2 (missing stats) + │ + └── • scan buffer + columns: (column1, column2, column3, column4, r, s, i, j, upsert_i, r, check1, upsert_r) + label: buffer 1 # None of the upserted values have nulls. query T @@ -4098,39 +4198,25 @@ vectorized: true │ columns: (column3) │ estimated row count: 1 │ - └── • project + └── • lookup join (semi) │ columns: (upsert_r, upsert_a, column3, column4) │ estimated row count: 1 + │ table: uniq_partial_enum@uniq_partial_enum_r_b_idx (partial index) + │ lookup condition: (column3 = b) AND (r IN ('us-east', 'us-west', 'eu-west')) + │ pred: (upsert_r != r) OR (upsert_a != a) │ - └── • lookup join (semi) - │ columns: ("lookup_join_const_col_@22", upsert_r, upsert_a, column3, column4) - │ table: uniq_partial_enum@uniq_partial_enum_r_b_idx (partial index) - │ equality: (lookup_join_const_col_@22, column3) = (r,b) - │ pred: (upsert_r != r) OR (upsert_a != a) + └── • filter + │ columns: (upsert_r, upsert_a, column3, column4) + │ estimated row count: 2 + │ filter: column4 IN ('bar', 'baz', 'foo') │ - └── • cross join (inner) - │ columns: ("lookup_join_const_col_@22", upsert_r, upsert_a, column3, column4) - │ estimated row count: 6 - │ - ├── • values - │ columns: ("lookup_join_const_col_@22") - │ size: 1 column, 3 rows - │ row 0, expr 0: 'us-east' - │ row 1, expr 0: 'us-west' - │ row 2, expr 0: 'eu-west' + └── • project + │ columns: (upsert_r, upsert_a, column3, column4) + │ estimated row count: 2 │ - └── • filter - │ columns: (upsert_r, upsert_a, column3, column4) - │ estimated row count: 2 - │ filter: column4 IN ('bar', 'baz', 'foo') - │ - └── • project - │ columns: (upsert_r, upsert_a, column3, column4) - │ estimated row count: 2 - │ - └── • scan buffer - columns: (column1, column2, column3, column4, r, a, b, c, column3, column4, r, check1, partial_index_put1, partial_index_del1, upsert_r, upsert_a) - label: buffer 1 + └── • scan buffer + columns: (column1, column2, column3, column4, r, a, b, c, column3, column4, r, check1, partial_index_put1, partial_index_del1, upsert_r, upsert_a) + label: buffer 1 # Test that we use the partial index when available for de-duplicating INSERT ON # CONFLICT DO UPDATE rows before inserting. diff --git a/pkg/sql/opt/xform/join_funcs.go b/pkg/sql/opt/xform/join_funcs.go index 24199fec20af..70c206fa0ea3 100644 --- a/pkg/sql/opt/xform/join_funcs.go +++ b/pkg/sql/opt/xform/join_funcs.go @@ -400,12 +400,13 @@ func (c *CustomFuncs) generateLookupJoinsImpl( } if len(foundVals) > 1 { - if joinType == opt.LeftJoinOp || joinType == opt.AntiJoinOp { - // We cannot use the method constructJoinWithConstants to create a cross - // join for left or anti joins, because constructing a cross join with - // foundVals will increase the size of the input. As a result, - // non-matching input rows will show up more than once in the output, - // which is incorrect (see #59615). + if joinType == opt.LeftJoinOp || joinType == opt.SemiJoinOp || joinType == opt.AntiJoinOp { + // We cannot use the method constructJoinWithConstants to + // create a cross join for left, semi, or anti joins, + // because constructing a cross join with foundVals will + // increase the size of the input. As a result, non-matching + // input rows will show up more than once in the output, + // which is incorrect (see #59615 and #78685). shouldBuildMultiSpanLookupJoin = true break } diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 34286cbbfbba..87e65fa62ead 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -2778,8 +2778,8 @@ anti-join (hash) ├── m:1 = a:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] └── n:2 = c:8 [outer=(2,8), constraints=(/2: (/NULL - ]; /8: (/NULL - ]), fd=(2)==(8), (8)==(2)] -# Regression test for #59615. Ensure that invalid lookup joins are not created -# for left and anti joins. +# Regression test for #59615 and #78681. Ensure that invalid lookup joins are +# not created for left, semi, and anti joins. exec-ddl CREATE TABLE t59615 ( x INT NOT NULL CHECK (x in (1, 3)), @@ -2807,6 +2807,26 @@ left-join (lookup t59615 [as=t]) │ └── (2,) └── filters (true) +# Regression test for #78681. +opt expect=GenerateLookupJoins +SELECT * FROM (VALUES (1), (2)) AS u(y) WHERE EXISTS ( + SELECT * FROM t59615 t WHERE u.y = t.y +) +---- +semi-join (lookup t59615 [as=t]) + ├── columns: y:1!null + ├── lookup expression + │ └── filters + │ ├── column1:1 = y:3 [outer=(1,3), constraints=(/1: (/NULL - ]; /3: (/NULL - ]), fd=(1)==(3), (3)==(1)] + │ └── x:2 IN (1, 3) [outer=(2), constraints=(/2: [/1 - /1] [/3 - /3]; tight)] + ├── cardinality: [0 - 2] + ├── values + │ ├── columns: column1:1!null + │ ├── cardinality: [2 - 2] + │ ├── (1,) + │ └── (2,) + └── filters (true) + opt expect=GenerateLookupJoins SELECT * FROM (VALUES (1), (2)) AS u(y) WHERE NOT EXISTS ( SELECT * FROM t59615 t WHERE u.y = t.y