Skip to content

Commit

Permalink
opt: remove uniqueness checks when uniqueness inferred through FDs
Browse files Browse the repository at this point in the history
This commit removes uniqueness checks for columns that can be
inferred to be unique through functional dependencies. This is
relevant in particular for REGIONAL BY ROW tables with a computed
region column that depends on the primary key. In this case,
uniqueness checks are never needed on the primary key, since
uniqueness is already guaranteed by the primary index.

Fixes #57720

Release justification: This commit is a low-risk, high benefit
update to new functionality.

Release note (performance improvement): Removed uniqueness checks
on the primary key for REGIONAL BY ROW tables with a computed
region column that is a function of the primary key columns.
Uniqueness checks are not necessary in this case since uniqueness
can be suitably guaranteed by the primary index. Removing these
checks improves performance of INSERT, UPDATE, and UPSERT
statements.
  • Loading branch information
rytaft committed Feb 25, 2021
1 parent 1ab9adb commit 1e795d1
Show file tree
Hide file tree
Showing 8 changed files with 538 additions and 11 deletions.
48 changes: 48 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/regional_by_row
Original file line number Diff line number Diff line change
Expand Up @@ -1035,6 +1035,54 @@ pk a b crdb_region_col
statement error cannot drop column crdb_region_col as it is used to store the region in a REGIONAL BY ROW table\nHINT: You must change the table locality before dropping this table
ALTER TABLE regional_by_row_table_as DROP COLUMN crdb_region_col

# We do not need uniqueness checks on pk since uniqueness can be inferred
# through the functional dependency between pk and the computed region column.
query T
EXPLAIN INSERT INTO regional_by_row_table_as (pk, a, b) VALUES (1, 1, 1)
----
distribution: local
vectorized: true
·
• root
├── • insert
│ │ into: regional_by_row_table_as(pk, a, b, crdb_region_col)
│ │
│ └── • buffer
│ │ label: buffer 1
│ │
│ └── • values
│ size: 5 columns, 1 row
└── • constraint-check
└── • error if rows
└── • lookup join (semi)
│ table: regional_by_row_table_as@regional_by_row_table_as_b_key
│ equality: (lookup_join_const_col_@21, column3) = (crdb_region_col,b)
│ equality cols are key
│ pred: (column1 != pk) OR (column10 != crdb_region_col)
└── • cross join
├── • values
│ size: 1 column, 3 rows
└── • scan buffer
label: buffer 1

# TODO(mgartner): Update this error message to remove crdb_region (see #59504).
statement error pq: duplicate key value violates unique constraint "primary"\nDETAIL: Key \(crdb_region_col,pk\)=\('us-east-1',1\) already exists\.
INSERT INTO regional_by_row_table_as (pk, a, b) VALUES (1, 1, 1)

statement ok
INSERT INTO regional_by_row_table_as (pk, a, b) VALUES (30, 1, 1)

statement error pq: duplicate key value violates unique constraint "regional_by_row_table_as_b_key"\nDETAIL: Key \(b\)=\(1\) already exists\.
INSERT INTO regional_by_row_table_as (pk, a, b) VALUES (2, 1, 1)


# Tests for altering the survivability of a REGIONAL BY ROW table.
statement ok
CREATE DATABASE alter_survive_db PRIMARY REGION "us-east-1" REGIONS "ca-central-1", "ap-southeast-2" SURVIVE REGION FAILURE
Expand Down
78 changes: 78 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/unique
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,23 @@ CREATE TABLE uniq_enum (
UNIQUE WITHOUT INDEX (s, j)
)

statement ok
CREATE TABLE uniq_computed_pk (
i INT,
s STRING,
d DECIMAL,
c_i_expr STRING AS (CASE WHEN i < 0 THEN 'foo' ELSE 'bar' END) STORED,
c_s STRING AS (s) VIRTUAL,
c_d DECIMAL AS (d) STORED,
c_d_expr STRING AS (d::string) STORED,
PRIMARY KEY (c_i_expr, i),
UNIQUE (c_s, s),
UNIQUE (c_d_expr, d),
UNIQUE WITHOUT INDEX (i),
UNIQUE WITHOUT INDEX (s),
UNIQUE WITHOUT INDEX (d)
)

statement ok
CREATE TABLE other (k INT, v INT, w INT NOT NULL, x INT, y INT)

Expand Down Expand Up @@ -322,6 +339,28 @@ a b
NULL 5
NULL 5

# Check that uniqueness violations are detected in a table with UNIQUE indexes
# containing computed columns that are dependent on UNIQUE WITHOUT INDEX
# columns.
statement ok
INSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'a', 1.0), (2, 'b', 2.0)

statement error pgcode 23505 pq: duplicate key value violates unique constraint "primary"\nDETAIL: Key \(c_i_expr,i\)=\('bar',1\) already exists\.
INSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'c', 3.0)

statement error pgcode 23505 pq: duplicate key value violates unique constraint "uniq_computed_pk_c_s_s_key"\nDETAIL: Key \(c_s,s\)=\('b','b'\) already exists\.
INSERT INTO uniq_computed_pk (i, s, d) VALUES (3, 'b', 3.0)

statement error pgcode 23505 pq: duplicate key value violates unique constraint "unique_d"\nDETAIL: Key \(d\)=\(1\.00\) already exists\.
INSERT INTO uniq_computed_pk (i, s, d) VALUES (3, 'c', 1.00)

query ITFTTFT colnames,rowsort
SELECT * FROM uniq_computed_pk
----
i s d c_i_expr c_s c_d c_d_expr
1 a 1.0 bar a 1.0 1.0
2 b 2.0 bar b 2.0 2.0


# -- Tests with UPDATE --
subtest Update
Expand Down Expand Up @@ -475,6 +514,25 @@ NULL 5
NULL 5
NULL 10

# Check that uniqueness violations are detected in a table with UNIQUE indexes
# containing computed columns that are dependent on UNIQUE WITHOUT INDEX
# columns.
statement error pgcode 23505 pq: duplicate key value violates unique constraint "primary"\nDETAIL: Key \(c_i_expr,i\)=\('bar',1\) already exists\.
UPDATE uniq_computed_pk SET i = 1 WHERE i = 2

statement error pgcode 23505 pq: duplicate key value violates unique constraint "uniq_computed_pk_c_s_s_key"\nDETAIL: Key \(c_s,s\)=\('a','a'\) already exists\.
UPDATE uniq_computed_pk SET s = 'a' WHERE i = 2

statement error pgcode 23505 pq: duplicate key value violates unique constraint "unique_d"\nDETAIL: Key \(d\)=\(1\.00\) already exists\.
UPDATE uniq_computed_pk SET d = 1.00 WHERE i = 2

query ITFTTFT colnames,rowsort
SELECT * FROM uniq_computed_pk
----
i s d c_i_expr c_s c_d c_d_expr
1 a 1.0 bar a 1.0 1.0
2 b 2.0 bar b 2.0 2.0


# -- Tests with UPSERT --
subtest Upsert
Expand Down Expand Up @@ -670,6 +728,26 @@ SELECT * FROM uniq_partial_index_and_constraint
i
2

# Check that uniqueness violations are detected in a table with UNIQUE indexes
# containing computed columns that are dependent on UNIQUE WITHOUT INDEX
# columns.
statement error pgcode 23505 pq: duplicate key value violates unique constraint "primary"\nDETAIL: Key \(c_i_expr,i\)=\('bar',2\) already exists\.
INSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'a', 1.0) ON CONFLICT (s) DO UPDATE SET i = 2

statement error pgcode 23505 pq: duplicate key value violates unique constraint "uniq_computed_pk_c_s_s_key"\nDETAIL: Key \(c_s,s\)=\('b','b'\) already exists\.
UPSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'b', 1.0)

statement error pgcode 23505 pq: duplicate key value violates unique constraint "unique_d"\nDETAIL: Key \(d\)=\(2\.00\) already exists\.
UPSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'a', 2.00)

query ITFTTFT colnames,rowsort
SELECT * FROM uniq_computed_pk
----
i s d c_i_expr c_s c_d c_d_expr
1 a 1.0 bar a 1.0 1.0
2 b 2.0 bar b 2.0 2.0


# -- Tests with DELETE --
subtest Delete

Expand Down
64 changes: 62 additions & 2 deletions pkg/sql/opt/exec/execbuilder/testdata/unique
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ CREATE TABLE uniq_partial_hidden_pk (
statement ok
CREATE TYPE region AS ENUM ('us-east', 'us-west', 'eu-west')

# TODO(rytaft): When more of the multi-region syntax is supported,
# add it here.
statement ok
CREATE TABLE uniq_enum (
r region DEFAULT CASE (random()*3)::int WHEN 0 THEN 'us-east' WHEN 1 THEN 'us-west' ELSE 'eu-west' END,
Expand Down Expand Up @@ -202,6 +200,29 @@ ALTER TABLE uniq_partial_enum INJECT STATISTICS '[
}
]'

statement ok
CREATE TABLE uniq_computed_pk (
i INT,
s STRING,
d DECIMAL,
c_i_expr STRING AS (CASE WHEN i < 0 THEN 'foo' ELSE 'bar' END) STORED,
c_s STRING AS (s) VIRTUAL,
c_d DECIMAL AS (d) STORED,
c_d_expr STRING AS (d::string) STORED,
PRIMARY KEY (c_i_expr, i),
UNIQUE (c_s, s),
UNIQUE (c_d_expr, d),
UNIQUE WITHOUT INDEX (i),
UNIQUE WITHOUT INDEX (s),
UNIQUE WITHOUT INDEX (d),
FAMILY (i),
FAMILY (s),
FAMILY (d),
FAMILY (c_i_expr),
FAMILY (c_d),
FAMILY (c_d_expr)
)

statement ok
CREATE TABLE other (k INT, v INT, w INT NOT NULL, x INT, y INT)

Expand Down Expand Up @@ -1641,6 +1662,45 @@ vectorized: true
columns: (column1, column2, column3, column4, check1, partial_index_put1)
label: buffer 1

# We can eliminate uniqueness checks for i and s due to functional dependencies.
# We cannot eliminate checks for d, since functional dependencies could not be
# inferred due to composite sensitivity of d::string.
query T
EXPLAIN INSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'a', 1.0), (2, 'b', 2.0)
----
distribution: local
vectorized: true
·
• root
├── • insert
│ │ into: uniq_computed_pk(i, s, d, c_i_expr, c_s, c_d, c_d_expr)
│ │
│ └── • buffer
│ │ label: buffer 1
│ │
│ └── • render
│ │
│ └── • values
│ size: 3 columns, 2 rows
└── • constraint-check
└── • error if rows
└── • hash join (right semi)
│ equality: (d) = (column3)
│ pred: (column1 != i) OR (column13 != c_i_expr)
├── • scan
│ missing stats
│ table: uniq_computed_pk@uniq_computed_pk_c_d_expr_d_key
│ spans: FULL SCAN
└── • scan buffer
label: buffer 1


# -- Tests with UPDATE --
subtest Update

Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/testdata/logprops/scan
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ index-join c
└── interesting orderings: (+1) (+3,+1)

# Test FDs for computed columns.
# We add equivalencies s=c_s and d=c_d, a strict dependency i->c_i_expr, and
# no dependency d->c_d_expr since the expression d::string is composite-
# sensitive.
exec-ddl
CREATE TABLE computed (
i INT,
Expand Down
59 changes: 50 additions & 9 deletions pkg/sql/opt/optbuilder/mutation_builder_unique.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ type uniqueCheckHelper struct {
// primaryKeyOrdinals includes the ordinals from any primary key columns
// that are not included in uniqueOrdinals.
primaryKeyOrdinals util.FastIntSet

// The scope and column ordinals of the scan that will serve as the right
// side of the semi join for the uniqueness checks.
scanScope *scope
scanOrdinals []int
}

// init initializes the helper with a unique constraint.
Expand All @@ -179,7 +184,7 @@ func (h *uniqueCheckHelper) init(mb *mutationBuilder, uniqueOrdinal int) bool {
// with columns that are a subset of the unique constraint columns.
// Similarly, we don't need a check for a partial unique constraint if there
// exists a non-partial unique constraint with columns that are a subset of
// the partial unique constrain columns.
// the partial unique constraint columns.
primaryOrds := getIndexLaxKeyOrdinals(mb.tab.Index(cat.PrimaryIndex))
primaryOrds.DifferenceWith(uniqueOrds)
if primaryOrds.Empty() {
Expand All @@ -203,7 +208,44 @@ func (h *uniqueCheckHelper) init(mb *mutationBuilder, uniqueOrdinal int) bool {

// If at least one unique column is getting a NULL value, unique check not
// needed.
return numNullCols == 0
if numNullCols != 0 {
return false
}

// Build the scan that will serve as the right side of the semi join in the
// uniqueness check. We need to build the scan now so that we can use its
// FDs below.
h.scanScope, h.scanOrdinals = h.buildTableScan()

// Check that the columns in the unique constraint aren't already known to
// form a lax key. This can happen if there is a unique index on a superset of
// these columns, where all other columns are computed columns that depend
// only on our columns. This is especially important for multi-region tables
// when the region column is computed.
//
// For example:
//
// CREATE TABLE tab (
// k INT PRIMARY KEY,
// region crdb_internal_region AS (
// CASE WHEN k < 10 THEN 'us-east1' ELSE 'us-west1' END
// ) STORED
// ) LOCALITY REGIONAL BY ROW AS region
//
// Because this is a REGIONAL BY ROW table, the region column is implicitly
// added to the front of every index, including the primary index. As a
// result, we would normally need to add a uniqueness check to all mutations
// to ensure that the primary key column (k in this case) remains unique.
// However, because the region column is computed and depends only on k, the
// presence of the unique index on (region, k) (i.e., the primary index) is
// sufficient to guarantee the uniqueness of k.
var uniqueCols opt.ColSet
h.uniqueOrdinals.ForEach(func(ord int) {
colID := h.scanScope.cols[ord].id
uniqueCols.Add(colID)
})
fds := &h.scanScope.expr.Relational().FuncDeps
return !fds.ColsAreLaxKey(uniqueCols)
}

// buildInsertionCheck creates a unique check for rows which are added to a
Expand All @@ -214,10 +256,9 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {

// Build a self semi-join, with the new values on the left and the
// existing values on the right.
scanScope, ordinals := h.buildTableScan()

withScanScope, _ := h.mb.buildCheckInputScan(
checkInputScanNewVals, ordinals,
checkInputScanNewVals, h.scanOrdinals,
)

// Build the join filters:
Expand All @@ -238,7 +279,7 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
semiJoinFilters = append(semiJoinFilters, f.ConstructFiltersItem(
f.ConstructEq(
f.ConstructVariable(withScanScope.cols[i].id),
f.ConstructVariable(scanScope.cols[i].id),
f.ConstructVariable(h.scanScope.cols[i].id),
),
))
}
Expand All @@ -255,8 +296,8 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
withScanPred := h.mb.b.buildScalar(typedPred, withScanScope, nil, nil, nil)
semiJoinFilters = append(semiJoinFilters, f.ConstructFiltersItem(withScanPred))

typedPred = scanScope.resolveAndRequireType(pred, types.Bool)
scanPred := h.mb.b.buildScalar(typedPred, scanScope, nil, nil, nil)
typedPred = h.scanScope.resolveAndRequireType(pred, types.Bool)
scanPred := h.mb.b.buildScalar(typedPred, h.scanScope, nil, nil, nil)
semiJoinFilters = append(semiJoinFilters, f.ConstructFiltersItem(scanPred))
}

Expand All @@ -268,7 +309,7 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
for i, ok := h.primaryKeyOrdinals.Next(0); ok; i, ok = h.primaryKeyOrdinals.Next(i + 1) {
pkFilterLocal := f.ConstructNe(
f.ConstructVariable(withScanScope.cols[i].id),
f.ConstructVariable(scanScope.cols[i].id),
f.ConstructVariable(h.scanScope.cols[i].id),
)
if pkFilter == nil {
pkFilter = pkFilterLocal
Expand All @@ -278,7 +319,7 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
}
semiJoinFilters = append(semiJoinFilters, f.ConstructFiltersItem(pkFilter))

semiJoin := f.ConstructSemiJoin(withScanScope.expr, scanScope.expr, semiJoinFilters, memo.EmptyJoinPrivate)
semiJoin := f.ConstructSemiJoin(withScanScope.expr, h.scanScope.expr, semiJoinFilters, memo.EmptyJoinPrivate)

// Collect the key columns that will be shown in the error message if there
// is a duplicate key violation resulting from this uniqueness check.
Expand Down
Loading

0 comments on commit 1e795d1

Please sign in to comment.