Skip to content

Commit

Permalink
Merge #60680
Browse files Browse the repository at this point in the history
60680: opt: support INSERT ON CONFLICT DO NOTHING with partial unique constraints r=mgartner a=mgartner

#### opt: move INSERT DO NOTHING arbiter code to mutation_builder_arbiter.go

Arbiter-related code in `pkg/sql/opt/optbuilder/insert.go` has grown
unruly due to the added complexity of partial indexes and unique
constraints. This commit moves some arbiter-related functions to a new
file to accommodate the growth. It also breaks some anonymous closures
into independent functions, for clarity.

Release note: None

#### opt: pass column ordinals directly to arbiter building functions

This commit makes the arguments of `buildAntiJoinForDoNothingArbiter` and
`buildDistinctOnForDoNothingArbiter` more intuitive. Columns ordinals
are now passed directly to these functions, rather than a column count
and ordinal-returning callback.

Release note: None

#### opt: create arbiterPredicateHelper for picking partial index arbiters

This commit adds a new helper struct that can determine if a partial
index can be used as an arbiter based on the arbiter predicate of an
`INSERT ON CONFLICT` statement. This will also be a useful utility to
determine if partial unique constraints can be used as arbiters.

Release note: None

#### opt: support INSERT ON CONFLICT DO NOTHING with partial unique constraints

To support INSERT ON CONFLICT DO NOTHING statements on tables with
partial UNIQUE WITHOUT INDEX constraints, partial constraints are now
selected as arbiters. These arbiters are used to filter out insert rows
that would conflict with existing rows in the table.

Informs #59195

There is no release note because these constraints are gated behind the
experimental_enable_unique_without_index_constraints session variable.

Release note: None


Co-authored-by: Marcus Gartner <[email protected]>
  • Loading branch information
craig[bot] and mgartner committed Feb 19, 2021
2 parents cb0d14a + 71391b6 commit 58216a6
Show file tree
Hide file tree
Showing 11 changed files with 1,322 additions and 428 deletions.
38 changes: 38 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/unique
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ INSERT INTO uniq SELECT k, v, w, x, y FROM other
statement ok
INSERT INTO uniq VALUES (100, 10, 1), (200, 20, 2), (400, 40, 4) ON CONFLICT (w) DO NOTHING

# On conflict do nothing with constant input, conflict on UNIQUE WITHOUT INDEX
# column, conflicting insert rows.
# Only row (500, 50, 50) is inserted.
statement ok
INSERT INTO uniq VALUES (500, 50, 50), (600, 50, 50) ON CONFLICT (w) DO NOTHING

# On conflict do nothing with constant input, no conflict columns.
# The only row that is successfully inserted here is (20, 20, 20, 20, 20).
statement ok
Expand All @@ -163,6 +169,7 @@ k v w x y
7 7 NULL 2 NULL
20 20 20 20 20
400 40 4 NULL 5
500 50 50 NULL 5


# Insert into a table in which the primary key overlaps some of the unique
Expand Down Expand Up @@ -281,6 +288,30 @@ INSERT INTO uniq_partial VALUES (NULL, 5), (5, 5), (NULL, 5)
statement error pgcode 23505 pq: duplicate key value violates unique constraint "unique_a"\nDETAIL: Key \(a\)=\(1\) already exists\.
INSERT INTO uniq_partial SELECT w, x FROM other

statement error there is no unique or exclusion constraint matching the ON CONFLICT specification
INSERT INTO uniq_partial VALUES (1, 6), (6, 6) ON CONFLICT (a) DO NOTHING

# On conflict do nothing with constant input, conflict on UNIQUE WITHOUT INDEX
# column. Only the non-conflicting row (6, 6) is inserted.
statement ok
INSERT INTO uniq_partial VALUES (1, 6), (6, 6) ON CONFLICT (a) WHERE b > 0 DO NOTHING

# On conflict do nothing with constant input, conflict on UNIQUE WITHOUT INDEX
# column, conflicting insert rows.
# Only rows (7, 7) and (7, -7) are inserted.
statement ok
INSERT INTO uniq_partial VALUES (7, 7), (7, 8), (7, -7) ON CONFLICT (a) WHERE b > 0 DO NOTHING

# On conflict do nothing with constant input, no conflict columns.
# Only rows (9, 9) and (9, -9) are inserted.
statement ok
INSERT INTO uniq_partial VALUES (1, 9), (9, 9), (9, 10), (9, -9) ON CONFLICT DO NOTHING

# On conflict do nothing with non-constant input.
# The (1, 10) row is not inserted because of a conflict with (1, 1).
statement ok
INSERT INTO uniq_partial SELECT w, k FROM other ON CONFLICT DO NOTHING

query II colnames,rowsort
SELECT * FROM uniq_partial
----
Expand All @@ -290,6 +321,11 @@ a b
1 -3
2 2
5 5
6 6
7 7
7 -7
9 9
9 -9
NULL 5
NULL 5

Expand Down Expand Up @@ -335,6 +371,7 @@ k v w x y
11 11 10 NULL 2
20 20 20 20 20
400 40 4 NULL 5
500 50 50 NULL 5


# Update a table with multiple primary key columns.
Expand Down Expand Up @@ -489,6 +526,7 @@ k v w x y
20 20 20 20 20
100 100 1 NULL 5
400 40 4 NULL 5
500 50 50 NULL 5


# Upsert into a table in which the primary key overlaps some of the unique
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/cat/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ type UniqueConstraint interface {
// constraint.
ColumnOrdinal(tab Table, i int) int

// Predicate returns the partial index predicate expression and true if the
// Predicate returns the partial predicate expression and true if the
// constraint is a partial unique constraint. If it is not, the empty string
// and false are returned.
Predicate() (string, bool)
Expand Down
287 changes: 286 additions & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/unique
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,8 @@ vectorized: true
columns: (column9, column1, column2, column10, check1)
label: buffer 1

# Test that we use the index when available for the ON CONFLICT checks.
# Test that we use the index when available for de-duplicating INSERT ON
# CONFLICT DO NOTHING rows before inserting.
query T
EXPLAIN (VERBOSE) INSERT INTO uniq_enum VALUES ('us-west', 'foo', 1, 1), ('us-east', 'bar', 2, 2)
ON CONFLICT DO NOTHING
Expand Down Expand Up @@ -1090,6 +1091,180 @@ vectorized: true
└── • scan buffer
label: buffer 1

# Use all the unique indexes and constraints as arbiters for DO NOTHING with no
# conflict columns.
# TODO(mgartner): we should be able to remove the unique checks in this case
# (see #59119).
query T
EXPLAIN (VERBOSE) INSERT INTO uniq_partial VALUES (1, 2, 3) ON CONFLICT DO NOTHING
----
distribution: local
vectorized: true
·
• root
│ columns: ()
├── • insert
│ │ columns: ()
│ │ estimated row count: 0 (missing stats)
│ │ into: uniq_partial(k, a, b)
│ │ arbiter indexes: primary
│ │ arbiter constraints: unique_a, unique_b
│ │
│ └── • buffer
│ │ columns: (column1, column2, column3)
│ │ label: buffer 1
│ │
│ └── • project
│ │ columns: (column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │
│ └── • distinct
│ │ columns: (arbiter_unique_b_distinct, column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ distinct on: arbiter_unique_b_distinct
│ │ nulls are distinct
│ │
│ └── • render
│ │ columns: (arbiter_unique_b_distinct, column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ render arbiter_unique_b_distinct: (column3 > 0) OR CAST(NULL AS BOOL)
│ │ render column1: column1
│ │ render column2: column2
│ │ render column3: column3
│ │
│ └── • distinct
│ │ columns: (arbiter_unique_a_distinct, column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ distinct on: arbiter_unique_a_distinct
│ │ nulls are distinct
│ │
│ └── • render
│ │ columns: (arbiter_unique_a_distinct, column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ render arbiter_unique_a_distinct: (column3 > 0) OR CAST(NULL AS BOOL)
│ │ render column1: column1
│ │ render column2: column2
│ │ render column3: column3
│ │
│ └── • hash join (right anti)
│ │ columns: (column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ equality: (b) = (column3)
│ │ right cols are key
│ │
│ ├── • filter
│ │ │ columns: (b)
│ │ │ estimated row count: 333 (missing stats)
│ │ │ filter: b > 0
│ │ │
│ │ └── • scan
│ │ columns: (b)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: uniq_partial@primary
│ │ spans: FULL SCAN
│ │
│ └── • hash join (right anti)
│ │ columns: (column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ equality: (a) = (column2)
│ │ right cols are key
│ │ pred: column3 > 0
│ │
│ ├── • filter
│ │ │ columns: (a, b)
│ │ │ estimated row count: 333 (missing stats)
│ │ │ filter: b > 0
│ │ │
│ │ └── • scan
│ │ columns: (a, b)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: uniq_partial@primary
│ │ spans: FULL SCAN
│ │
│ └── • cross join (anti)
│ │ columns: (column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │
│ ├── • values
│ │ columns: (column1, column2, column3)
│ │ size: 3 columns, 1 row
│ │ row 0, expr 0: 1
│ │ row 0, expr 1: 2
│ │ row 0, expr 2: 3
│ │
│ └── • scan
│ columns: (k)
│ estimated row count: 1 (missing stats)
│ table: uniq_partial@primary
│ spans: /1/0-/1/1
├── • constraint-check
│ │
│ └── • error if rows
│ │ columns: ()
│ │
│ └── • hash join (right semi)
│ │ columns: (column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ equality: (a) = (column2)
│ │ right cols are key
│ │ pred: column1 != k
│ │
│ ├── • filter
│ │ │ columns: (k, a, b)
│ │ │ estimated row count: 333 (missing stats)
│ │ │ filter: b > 0
│ │ │
│ │ └── • scan
│ │ columns: (k, a, b)
│ │ estimated row count: 1,000 (missing stats)
│ │ table: uniq_partial@primary
│ │ spans: FULL SCAN
│ │
│ └── • filter
│ │ columns: (column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ filter: column3 > 0
│ │
│ └── • scan buffer
│ columns: (column1, column2, column3)
│ estimated row count: 0 (missing stats)
│ label: buffer 1
└── • constraint-check
└── • error if rows
│ columns: ()
└── • hash join (right semi)
│ columns: (column1, column2, column3)
│ estimated row count: 0 (missing stats)
│ equality: (b) = (column3)
│ right cols are key
│ pred: column1 != k
├── • filter
│ │ columns: (k, b)
│ │ estimated row count: 333 (missing stats)
│ │ filter: b > 0
│ │
│ └── • scan
│ columns: (k, b)
│ estimated row count: 1,000 (missing stats)
│ table: uniq_partial@primary
│ spans: FULL SCAN
└── • filter
│ columns: (column1, column2, column3)
│ estimated row count: 0 (missing stats)
│ filter: column3 > 0
└── • scan buffer
columns: (column1, column2, column3)
estimated row count: 0 (missing stats)
label: buffer 1

# Insert with non-constant input.
query T
EXPLAIN INSERT INTO uniq_partial SELECT k, v, w FROM other
Expand Down Expand Up @@ -1290,6 +1465,116 @@ vectorized: true
columns: (column1, column2, column3, check1, partial_index_put1)
label: buffer 1

# Test that we use the partial index when available for de-duplicating INSERT ON
# CONFLICT DO NOTHING rows before inserting.
query T
EXPLAIN (VERBOSE) INSERT INTO uniq_partial_enum VALUES ('us-west', 1, 'foo'), ('us-east', 2, 'bar')
ON CONFLICT DO NOTHING
----
distribution: local
vectorized: true
·
• root
│ columns: ()
├── • insert
│ │ columns: ()
│ │ estimated row count: 0 (missing stats)
│ │ into: uniq_partial_enum(r, i, s)
│ │ arbiter indexes: primary
│ │ arbiter constraints: unique_i
│ │
│ └── • buffer
│ │ columns: (column1, column2, column3, check1, partial_index_put1)
│ │ label: buffer 1
│ │
│ └── • render
│ │ columns: (column1, column2, column3, check1, partial_index_put1)
│ │ estimated row count: 0 (missing stats)
│ │ render partial_index_put1: column3 IN ('bar', 'baz', 'foo')
│ │ render check1: column1 IN ('us-east', 'us-west', 'eu-west')
│ │ render column1: column1
│ │ render column2: column2
│ │ render column3: column3
│ │
│ └── • distinct
│ │ columns: (arbiter_unique_i_distinct, column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ distinct on: arbiter_unique_i_distinct, column2
│ │ nulls are distinct
│ │
│ └── • render
│ │ columns: (arbiter_unique_i_distinct, column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ render arbiter_unique_i_distinct: (column3 IN ('bar', 'baz', 'foo')) OR CAST(NULL AS BOOL)
│ │ render column1: column1
│ │ render column2: column2
│ │ render column3: column3
│ │
│ └── • lookup join (anti)
│ │ columns: (column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ table: uniq_partial_enum@uniq_partial_enum_r_i_idx (partial index)
│ │ lookup condition: (column2 = i) AND (r IN ('us-east', 'us-west', 'eu-west'))
│ │ pred: column3 IN ('bar', 'baz', 'foo')
│ │
│ └── • lookup join (anti)
│ │ columns: (column1, column2, column3)
│ │ estimated row count: 0 (missing stats)
│ │ table: uniq_partial_enum@primary
│ │ equality: (column1, column2) = (r,i)
│ │ equality cols are key
│ │
│ └── • values
│ columns: (column1, column2, column3)
│ size: 3 columns, 2 rows
│ row 0, expr 0: 'us-west'
│ row 0, expr 1: 1
│ row 0, expr 2: 'foo'
│ row 1, expr 0: 'us-east'
│ row 1, expr 1: 2
│ row 1, expr 2: 'bar'
└── • constraint-check
└── • error if rows
│ columns: ()
└── • project
│ columns: (column1, column2, column3)
│ estimated row count: 0 (missing stats)
└── • lookup join (semi)
│ columns: ("lookup_join_const_col_@22", column1, column2, column3)
│ table: uniq_partial_enum@uniq_partial_enum_r_i_idx (partial index)
│ equality: (lookup_join_const_col_@22, column2) = (r,i)
│ equality cols are key
│ pred: column1 != r
└── • cross join (inner)
│ columns: ("lookup_join_const_col_@22", column1, column2, column3)
│ estimated row count: 0 (missing stats)
├── • values
│ columns: ("lookup_join_const_col_@22")
│ size: 1 column, 3 rows
│ row 0, expr 0: 'us-east'
│ row 1, expr 0: 'us-west'
│ row 2, expr 0: 'eu-west'
└── • filter
│ columns: (column1, column2, column3)
│ estimated row count: 0 (missing stats)
│ filter: column3 IN ('bar', 'baz', 'foo')
└── • project
│ columns: (column1, column2, column3)
│ estimated row count: 0 (missing stats)
└── • scan buffer
columns: (column1, column2, column3, check1, partial_index_put1)
label: buffer 1

# -- Tests with UPDATE --
subtest Update

Expand Down
Loading

0 comments on commit 58216a6

Please sign in to comment.