Skip to content

Commit

Permalink
opt: do not cross-join input of semi-join
Browse files Browse the repository at this point in the history
This commit fixes a logical correctness bug caused when
`GenerateLookupJoins` cross-joins the input of a semi-join with a set of
constant values to constrain the prefix columns of the lookup index. The
cross-join is an invalid transformation because it increases the size of
the join's input and can increase the size of the join's output.

We already avoid these cross-joins for left and anti-joins (see #59646).
When addressing those cases, the semi-join case was incorrectly assumed
to be safe.

Fixes #78681

Release note (bug fix): A bug has been fixed which caused the optimizer
to generate invalid query plans which could result in incorrect query
results. The bug, which has been present since version 21.1.0, can
appear if all of the following conditions are true: 1) the query
contains a semi-join, such as queries in the form:
`SELECT * FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t1.a = t2.a);`,
2) the inner table has an index containing the equality column, like
`t2.a` in the example query, 3) the index contains one or more
columns that prefix the equality column, and 4) the prefix columns are
`NOT NULL` and are constrained to a set of constant values via a `CHECK`
constraint or an `IN` condition in the filter.
  • Loading branch information
mgartner committed Mar 29, 2022
1 parent c1e089d commit 1d7811d
Show file tree
Hide file tree
Showing 5 changed files with 504 additions and 402 deletions.
203 changes: 91 additions & 112 deletions pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_query_behavior
Original file line number Diff line number Diff line change
Expand Up @@ -1886,39 +1886,45 @@ SELECT * FROM [EXPLAIN INSERT INTO regional_by_row_table_virt (pk, a, b) VALUES
│ │
│ └── • error if rows
│ │
│ └── • lookup join (semi)
│ │ table: regional_by_row_table_virt@regional_by_row_table_virt_v_key
│ │ equality: (lookup_join_const_col_@34, v_comp) = (crdb_region,v)
│ │ equality cols are key
│ │ pred: (column1 != pk) OR (crdb_region_default != crdb_region)
│ └── • limit
│ │ count: 1
│ │
│ └── • cross join
│ │ estimated row count: 3
│ │
├── • values
│ │ size: 1 column, 3 rows
│ └── • lookup join
│ │ table: regional_by_row_table_virt@regional_by_row_table_virt_v_key
│ │ equality: (lookup_join_const_col_@34, v_comp) = (crdb_region,v)
│ equality cols are key
│ │ pred: (column1 != pk) OR (crdb_region_default != crdb_region)
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • cross join
│ │ estimated row count: 3
│ │
│ ├── • values
│ │ size: 1 column, 3 rows
│ │
│ └── • scan buffer
│ label: buffer 1
└── • constraint-check
└── • error if rows
└── • lookup join (semi)
│ table: regional_by_row_table_virt@regional_by_row_table_virt_expr_key
│ equality: (lookup_join_const_col_@48, crdb_internal_idx_expr_comp) = (crdb_region,crdb_internal_idx_expr)
│ equality cols are key
│ pred: (column1 != pk) OR (crdb_region_default != crdb_region)
└── • limit
│ count: 1
└── • cross join
estimated row count: 3
├── • values
size: 1 column, 3 rows
└── • lookup join
table: regional_by_row_table_virt@regional_by_row_table_virt_expr_key
equality: (lookup_join_const_col_@48, crdb_internal_idx_expr_comp) = (crdb_region,crdb_internal_idx_expr)
│ equality cols are key
pred: (column1 != pk) OR (crdb_region_default != crdb_region)
└── • scan buffer
label: buffer 1
└── • cross join
│ estimated row count: 3
├── • values
│ size: 1 column, 3 rows
└── • scan buffer
label: buffer 1

query T
SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table_virt (pk, a, b) VALUES (1, 1, 1)] OFFSET 2
Expand Down Expand Up @@ -1961,37 +1967,43 @@ SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table_virt (pk, a, b) VALUES
│ │
│ └── • error if rows
│ │
│ └── • lookup join (semi)
│ │ table: regional_by_row_table_virt@regional_by_row_table_virt_v_key
│ │ equality: (lookup_join_const_col_@30, v_comp) = (crdb_region,v)
│ │ equality cols are key
│ │ pred: (upsert_pk != pk) OR (upsert_crdb_region != crdb_region)
│ └── • limit
│ │ count: 1
│ │
│ └── • cross join
│ │
│ ├── • values
│ │ size: 1 column, 3 rows
│ └── • lookup join
│ │ table: regional_by_row_table_virt@regional_by_row_table_virt_v_key
│ │ equality: (lookup_join_const_col_@30, v_comp) = (crdb_region,v)
│ │ equality cols are key
│ │ pred: (upsert_pk != pk) OR (upsert_crdb_region != crdb_region)
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • cross join
│ │
│ ├── • values
│ │ size: 1 column, 3 rows
│ │
│ └── • scan buffer
│ label: buffer 1
└── • constraint-check
└── • error if rows
└── • lookup join (semi)
│ table: regional_by_row_table_virt@regional_by_row_table_virt_expr_key
│ equality: (lookup_join_const_col_@44, crdb_internal_idx_expr_comp) = (crdb_region,crdb_internal_idx_expr)
│ equality cols are key
│ pred: (upsert_pk != pk) OR (upsert_crdb_region != crdb_region)
└── • limit
│ count: 1
└── • cross join
├── • values
│ size: 1 column, 3 rows
└── • lookup join
│ table: regional_by_row_table_virt@regional_by_row_table_virt_expr_key
│ equality: (lookup_join_const_col_@44, crdb_internal_idx_expr_comp) = (crdb_region,crdb_internal_idx_expr)
│ equality cols are key
│ pred: (upsert_pk != pk) OR (upsert_crdb_region != crdb_region)
└── • scan buffer
label: buffer 1
└── • cross join
├── • values
│ size: 1 column, 3 rows
└── • scan buffer
label: buffer 1

statement ok
INSERT INTO regional_by_row_table_virt (pk, a, b) VALUES (1, 1, 1)
Expand Down Expand Up @@ -2054,65 +2066,47 @@ SELECT * FROM [EXPLAIN INSERT INTO regional_by_row_table_virt_partial (pk, a, b)
│ │
│ └── • lookup join (semi)
│ │ table: regional_by_row_table_virt_partial@v_a_gt_0 (partial index)
│ │ equality: (lookup_join_const_col_@36, v_comp) = (crdb_region,v)
│ │ lookup condition: (v_comp = v) AND (crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1'))
│ │ pred: (column1 != pk) OR (crdb_region_default != crdb_region)
│ │
│ └── • cross join
│ │ estimated row count: 3
│ │
│ ├── • values
│ │ size: 1 column, 3 rows
│ └── • filter
│ │ estimated row count: 1
│ │ filter: column2 > 0
│ │
│ └── • filter
│ │ estimated row count: 1
│ │ filter: column2 > 0
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • scan buffer
│ label: buffer 1
├── • constraint-check
│ │
│ └── • error if rows
│ │
│ └── • lookup join (semi)
│ │ table: regional_by_row_table_virt_partial@v_v_gt_0 (partial index)
│ │ equality: (lookup_join_const_col_@50, v_comp) = (crdb_region,v)
│ │ lookup condition: (v_comp = v) AND (crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1'))
│ │ pred: (column1 != pk) OR (crdb_region_default != crdb_region)
│ │
│ └── • cross join
│ │ estimated row count: 3
│ │
│ ├── • values
│ │ size: 1 column, 3 rows
│ └── • filter
│ │ estimated row count: 1
│ │ filter: v_comp > 0
│ │
│ └── • filter
│ │ estimated row count: 1
│ │ filter: v_comp > 0
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • scan buffer
│ label: buffer 1
└── • constraint-check
└── • error if rows
└── • lookup join (semi)
│ table: regional_by_row_table_virt_partial@a_plus_10_v_gt_0 (partial index)
equality: (lookup_join_const_col_@64, crdb_internal_idx_expr_comp) = (crdb_region,crdb_internal_idx_expr)
lookup condition: (crdb_internal_idx_expr_comp = crdb_internal_idx_expr) AND (crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1'))
│ pred: (column1 != pk) OR (crdb_region_default != crdb_region)
└── • cross join
│ estimated row count: 3
├── • values
│ size: 1 column, 3 rows
└── • filter
│ estimated row count: 1
│ filter: v_comp > 0
└── • filter
│ estimated row count: 1
│ filter: v_comp > 0
└── • scan buffer
label: buffer 1
└── • scan buffer
label: buffer 1

query T
SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table_virt_partial (pk, a, b) VALUES (1, 1, 1)] OFFSET 2
Expand Down Expand Up @@ -2157,59 +2151,44 @@ SELECT * FROM [EXPLAIN UPSERT INTO regional_by_row_table_virt_partial (pk, a, b)
│ │
│ └── • lookup join (semi)
│ │ table: regional_by_row_table_virt_partial@v_a_gt_0 (partial index)
│ │ equality: (lookup_join_const_col_@34, v_comp) = (crdb_region,v)
│ │ lookup condition: (v_comp = v) AND (crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1'))
│ │ pred: (upsert_pk != pk) OR (upsert_crdb_region != crdb_region)
│ │
│ └── • cross join
│ │
│ ├── • values
│ │ size: 1 column, 3 rows
│ └── • filter
│ │ filter: column2 > 0
│ │
│ └── • filter
│ │ filter: column2 > 0
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • scan buffer
│ label: buffer 1
├── • constraint-check
│ │
│ └── • error if rows
│ │
│ └── • lookup join (semi)
│ │ table: regional_by_row_table_virt_partial@v_v_gt_0 (partial index)
│ │ equality: (lookup_join_const_col_@48, v_comp) = (crdb_region,v)
│ │ lookup condition: (v_comp = v) AND (crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1'))
│ │ pred: (upsert_pk != pk) OR (upsert_crdb_region != crdb_region)
│ │
│ └── • cross join
│ │
│ ├── • values
│ │ size: 1 column, 3 rows
│ └── • filter
│ │ filter: v_comp > 0
│ │
│ └── • filter
│ │ filter: v_comp > 0
│ │
│ └── • scan buffer
│ label: buffer 1
│ └── • scan buffer
│ label: buffer 1
└── • constraint-check
└── • error if rows
└── • lookup join (semi)
│ table: regional_by_row_table_virt_partial@a_plus_10_v_gt_0 (partial index)
equality: (lookup_join_const_col_@62, crdb_internal_idx_expr_comp) = (crdb_region,crdb_internal_idx_expr)
lookup condition: (crdb_internal_idx_expr_comp = crdb_internal_idx_expr) AND (crdb_region IN ('ap-southeast-2', 'ca-central-1', 'us-east-1'))
│ pred: (upsert_pk != pk) OR (upsert_crdb_region != crdb_region)
└── • cross join
├── • values
│ size: 1 column, 3 rows
└── • filter
│ filter: v_comp > 0
└── • filter
│ filter: v_comp > 0
└── • scan buffer
label: buffer 1
└── • scan buffer
label: buffer 1

statement ok
INSERT INTO regional_by_row_table_virt_partial (pk, a, b) VALUES (1, 1, 1)
Expand Down
30 changes: 30 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/lookup_join
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,36 @@ SELECT * FROM (VALUES (1), (2)) AS u(y) WHERE NOT EXISTS (
1
2

# Regression test for #78681. Ensure that invalid lookup joins are not created
# for semi joins.
statement ok
CREATE TABLE t78681 (
x INT NOT NULL CHECK (x in (1, 3)),
y INT NOT NULL,
PRIMARY KEY (x, y)
)

# Insert stats so that a lookup semi-join is selected.
statement ok
ALTER TABLE t78681 INJECT STATISTICS '[
{
"columns": ["x"],
"created_at": "2018-05-01 1:00:00.00000+00:00",
"row_count": 10000000,
"distinct_count": 2
}
]'

statement ok
INSERT INTO t78681 VALUES (1, 1), (3, 1)

query I rowsort
SELECT * FROM (VALUES (1), (2)) AS u(y) WHERE EXISTS (
SELECT * FROM t78681 t WHERE u.y = t.y
)
----
1

statement ok
CREATE TABLE lookup_expr (
r STRING NOT NULL CHECK (r IN ('east', 'west')),
Expand Down
Loading

0 comments on commit 1d7811d

Please sign in to comment.