opt: remove uniqueness checks when uniqueness inferred through FDs

This commit removes uniqueness checks for columns that can be inferred to be unique through functional dependencies. This is relevant in particular for REGIONAL BY ROW tables with a computed region column that depends on the primary key. In this case, uniqueness checks are never needed on the primary key, since uniqueness is already guaranteed by the primary index. Fixes #57720 Release justification: This commit is a low-risk, high benefit update to new functionality. Release note (performance improvement): Removed uniqueness checks on the primary key for REGIONAL BY ROW tables with a computed region column that is a function of the primary key columns. Uniqueness checks are not necessary in this case since uniqueness can be suitably guaranteed by the primary index. Removing these checks improves performance of INSERT, UPDATE, and UPSERT statements.
cockroachdb · Feb 25, 2021 · 1e795d1 · 1e795d1
1 parent 1ab9adb
commit 1e795d1
Show file tree

Hide file tree

Showing 8 changed files with 538 additions and 11 deletions.
diff --git a/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row b/pkg/ccl/logictestccl/testdata/logic_test/regional_by_row
@@ -1035,6 +1035,54 @@ pk  a     b     crdb_region_col
 statement error cannot drop column crdb_region_col as it is used to store the region in a REGIONAL BY ROW table\nHINT: You must change the table locality before dropping this table
 ALTER TABLE regional_by_row_table_as DROP COLUMN crdb_region_col
 
+# We do not need uniqueness checks on pk since uniqueness can be inferred
+# through the functional dependency between pk and the computed region column.
+query T
+EXPLAIN INSERT INTO regional_by_row_table_as (pk, a, b) VALUES (1, 1, 1)
+----
+distribution: local
+vectorized: true
+·
+• root
+│
+├── • insert
+│   │ into: regional_by_row_table_as(pk, a, b, crdb_region_col)
+│   │
+│   └── • buffer
+│       │ label: buffer 1
+│       │
+│       └── • values
+│             size: 5 columns, 1 row
+│
+└── • constraint-check
+    │
+    └── • error if rows
+        │
+        └── • lookup join (semi)
+            │ table: regional_by_row_table_as@regional_by_row_table_as_b_key
+            │ equality: (lookup_join_const_col_@21, column3) = (crdb_region_col,b)
+            │ equality cols are key
+            │ pred: (column1 != pk) OR (column10 != crdb_region_col)
+            │
+            └── • cross join
+                │
+                ├── • values
+                │     size: 1 column, 3 rows
+                │
+                └── • scan buffer
+                      label: buffer 1
+
+# TODO(mgartner): Update this error message to remove crdb_region (see #59504).
+statement error pq: duplicate key value violates unique constraint "primary"\nDETAIL: Key \(crdb_region_col,pk\)=\('us-east-1',1\) already exists\.
+INSERT INTO regional_by_row_table_as (pk, a, b) VALUES (1, 1, 1)
+
+statement ok
+INSERT INTO regional_by_row_table_as (pk, a, b) VALUES (30, 1, 1)
+
+statement error pq: duplicate key value violates unique constraint "regional_by_row_table_as_b_key"\nDETAIL: Key \(b\)=\(1\) already exists\.
+INSERT INTO regional_by_row_table_as (pk, a, b) VALUES (2, 1, 1)
+
+
 # Tests for altering the survivability of a REGIONAL BY ROW table.
 statement ok
 CREATE DATABASE alter_survive_db PRIMARY REGION "us-east-1" REGIONS "ca-central-1", "ap-southeast-2" SURVIVE REGION FAILURE

diff --git a/pkg/sql/logictest/testdata/logic_test/unique b/pkg/sql/logictest/testdata/logic_test/unique
@@ -86,6 +86,23 @@ CREATE TABLE uniq_enum (
   UNIQUE WITHOUT INDEX (s, j)
 )
 
+statement ok
+CREATE TABLE uniq_computed_pk (
+  i INT,
+  s STRING,
+  d DECIMAL,
+  c_i_expr STRING AS (CASE WHEN i < 0 THEN 'foo' ELSE 'bar' END) STORED,
+  c_s STRING AS (s) VIRTUAL,
+  c_d DECIMAL AS (d) STORED,
+  c_d_expr STRING AS (d::string) STORED,
+  PRIMARY KEY (c_i_expr, i),
+  UNIQUE (c_s, s),
+  UNIQUE (c_d_expr, d),
+  UNIQUE WITHOUT INDEX (i),
+  UNIQUE WITHOUT INDEX (s),
+  UNIQUE WITHOUT INDEX (d)
+)
+
 statement ok
 CREATE TABLE other (k INT, v INT, w INT NOT NULL, x INT, y INT)
 
@@ -322,6 +339,28 @@ a     b
 NULL  5
 NULL  5
 
+# Check that uniqueness violations are detected in a table with UNIQUE indexes
+# containing computed columns that are dependent on UNIQUE WITHOUT INDEX
+# columns.
+statement ok
+INSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'a', 1.0), (2, 'b', 2.0)
+
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "primary"\nDETAIL: Key \(c_i_expr,i\)=\('bar',1\) already exists\.
+INSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'c', 3.0)
+
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "uniq_computed_pk_c_s_s_key"\nDETAIL: Key \(c_s,s\)=\('b','b'\) already exists\.
+INSERT INTO uniq_computed_pk (i, s, d) VALUES (3, 'b', 3.0)
+
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "unique_d"\nDETAIL: Key \(d\)=\(1\.00\) already exists\.
+INSERT INTO uniq_computed_pk (i, s, d) VALUES (3, 'c', 1.00)
+
+query ITFTTFT colnames,rowsort
+SELECT * FROM uniq_computed_pk
+----
+i  s  d    c_i_expr  c_s  c_d  c_d_expr
+1  a  1.0  bar       a    1.0  1.0
+2  b  2.0  bar       b    2.0  2.0
+
 
 # -- Tests with UPDATE --
 subtest Update
@@ -475,6 +514,25 @@ NULL  5
 NULL  5
 NULL  10
 
+# Check that uniqueness violations are detected in a table with UNIQUE indexes
+# containing computed columns that are dependent on UNIQUE WITHOUT INDEX
+# columns.
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "primary"\nDETAIL: Key \(c_i_expr,i\)=\('bar',1\) already exists\.
+UPDATE uniq_computed_pk SET i = 1 WHERE i = 2
+
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "uniq_computed_pk_c_s_s_key"\nDETAIL: Key \(c_s,s\)=\('a','a'\) already exists\.
+UPDATE uniq_computed_pk SET s = 'a' WHERE i = 2
+
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "unique_d"\nDETAIL: Key \(d\)=\(1\.00\) already exists\.
+UPDATE uniq_computed_pk SET d = 1.00 WHERE i = 2
+
+query ITFTTFT colnames,rowsort
+SELECT * FROM uniq_computed_pk
+----
+i  s  d    c_i_expr  c_s  c_d  c_d_expr
+1  a  1.0  bar       a    1.0  1.0
+2  b  2.0  bar       b    2.0  2.0
+
 
 # -- Tests with UPSERT --
 subtest Upsert
@@ -670,6 +728,26 @@ SELECT * FROM uniq_partial_index_and_constraint
 i
 2
 
+# Check that uniqueness violations are detected in a table with UNIQUE indexes
+# containing computed columns that are dependent on UNIQUE WITHOUT INDEX
+# columns.
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "primary"\nDETAIL: Key \(c_i_expr,i\)=\('bar',2\) already exists\.
+INSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'a', 1.0) ON CONFLICT (s) DO UPDATE SET i = 2
+
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "uniq_computed_pk_c_s_s_key"\nDETAIL: Key \(c_s,s\)=\('b','b'\) already exists\.
+UPSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'b', 1.0)
+
+statement error pgcode 23505 pq: duplicate key value violates unique constraint "unique_d"\nDETAIL: Key \(d\)=\(2\.00\) already exists\.
+UPSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'a', 2.00)
+
+query ITFTTFT colnames,rowsort
+SELECT * FROM uniq_computed_pk
+----
+i  s  d    c_i_expr  c_s  c_d  c_d_expr
+1  a  1.0  bar       a    1.0  1.0
+2  b  2.0  bar       b    2.0  2.0
+
+
 # -- Tests with DELETE --
 subtest Delete
 

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/unique b/pkg/sql/opt/exec/execbuilder/testdata/unique
@@ -110,8 +110,6 @@ CREATE TABLE uniq_partial_hidden_pk (
 statement ok
 CREATE TYPE region AS ENUM ('us-east', 'us-west', 'eu-west')
 
-# TODO(rytaft): When more of the multi-region syntax is supported,
-# add it here.
 statement ok
 CREATE TABLE uniq_enum (
   r region DEFAULT CASE (random()*3)::int WHEN 0 THEN 'us-east' WHEN 1 THEN 'us-west' ELSE 'eu-west' END,
@@ -202,6 +200,29 @@ ALTER TABLE uniq_partial_enum INJECT STATISTICS '[
   }
 ]'
 
+statement ok
+CREATE TABLE uniq_computed_pk (
+  i INT,
+  s STRING,
+  d DECIMAL,
+  c_i_expr STRING AS (CASE WHEN i < 0 THEN 'foo' ELSE 'bar' END) STORED,
+  c_s STRING AS (s) VIRTUAL,
+  c_d DECIMAL AS (d) STORED,
+  c_d_expr STRING AS (d::string) STORED,
+  PRIMARY KEY (c_i_expr, i),
+  UNIQUE (c_s, s),
+  UNIQUE (c_d_expr, d),
+  UNIQUE WITHOUT INDEX (i),
+  UNIQUE WITHOUT INDEX (s),
+  UNIQUE WITHOUT INDEX (d),
+  FAMILY (i),
+  FAMILY (s),
+  FAMILY (d),
+  FAMILY (c_i_expr),
+  FAMILY (c_d),
+  FAMILY (c_d_expr)
+)
+
 statement ok
 CREATE TABLE other (k INT, v INT, w INT NOT NULL, x INT, y INT)
 
@@ -1641,6 +1662,45 @@ vectorized: true
                                   columns: (column1, column2, column3, column4, check1, partial_index_put1)
                                   label: buffer 1
 
+# We can eliminate uniqueness checks for i and s due to functional dependencies.
+# We cannot eliminate checks for d, since functional dependencies could not be
+# inferred due to composite sensitivity of d::string.
+query T
+EXPLAIN INSERT INTO uniq_computed_pk (i, s, d) VALUES (1, 'a', 1.0), (2, 'b', 2.0)
+----
+distribution: local
+vectorized: true
+·
+• root
+│
+├── • insert
+│   │ into: uniq_computed_pk(i, s, d, c_i_expr, c_s, c_d, c_d_expr)
+│   │
+│   └── • buffer
+│       │ label: buffer 1
+│       │
+│       └── • render
+│           │
+│           └── • values
+│                 size: 3 columns, 2 rows
+│
+└── • constraint-check
+    │
+    └── • error if rows
+        │
+        └── • hash join (right semi)
+            │ equality: (d) = (column3)
+            │ pred: (column1 != i) OR (column13 != c_i_expr)
+            │
+            ├── • scan
+            │     missing stats
+            │     table: uniq_computed_pk@uniq_computed_pk_c_d_expr_d_key
+            │     spans: FULL SCAN
+            │
+            └── • scan buffer
+                  label: buffer 1
+
+
 # -- Tests with UPDATE --
 subtest Update
 

diff --git a/pkg/sql/opt/memo/testdata/logprops/scan b/pkg/sql/opt/memo/testdata/logprops/scan
@@ -399,6 +399,9 @@ index-join c
       └── interesting orderings: (+1) (+3,+1)
 
 # Test FDs for computed columns.
+# We add equivalencies s=c_s and d=c_d, a strict dependency i->c_i_expr, and
+# no dependency d->c_d_expr since the expression d::string is composite-
+# sensitive.
 exec-ddl
 CREATE TABLE computed (
   i INT,

diff --git a/pkg/sql/opt/optbuilder/mutation_builder_unique.go b/pkg/sql/opt/optbuilder/mutation_builder_unique.go
@@ -153,6 +153,11 @@ type uniqueCheckHelper struct {
 	// primaryKeyOrdinals includes the ordinals from any primary key columns
 	// that are not included in uniqueOrdinals.
 	primaryKeyOrdinals util.FastIntSet
+
+	// The scope and column ordinals of the scan that will serve as the right
+	// side of the semi join for the uniqueness checks.
+	scanScope    *scope
+	scanOrdinals []int
 }
 
 // init initializes the helper with a unique constraint.
@@ -179,7 +184,7 @@ func (h *uniqueCheckHelper) init(mb *mutationBuilder, uniqueOrdinal int) bool {
 	// with columns that are a subset of the unique constraint columns.
 	// Similarly, we don't need a check for a partial unique constraint if there
 	// exists a non-partial unique constraint with columns that are a subset of
-	// the partial unique constrain columns.
+	// the partial unique constraint columns.
 	primaryOrds := getIndexLaxKeyOrdinals(mb.tab.Index(cat.PrimaryIndex))
 	primaryOrds.DifferenceWith(uniqueOrds)
 	if primaryOrds.Empty() {
@@ -203,7 +208,44 @@ func (h *uniqueCheckHelper) init(mb *mutationBuilder, uniqueOrdinal int) bool {
 
 	// If at least one unique column is getting a NULL value, unique check not
 	// needed.
-	return numNullCols == 0
+	if numNullCols != 0 {
+		return false
+	}
+
+	// Build the scan that will serve as the right side of the semi join in the
+	// uniqueness check. We need to build the scan now so that we can use its
+	// FDs below.
+	h.scanScope, h.scanOrdinals = h.buildTableScan()
+
+	// Check that the columns in the unique constraint aren't already known to
+	// form a lax key. This can happen if there is a unique index on a superset of
+	// these columns, where all other columns are computed columns that depend
+	// only on our columns. This is especially important for multi-region tables
+	// when the region column is computed.
+	//
+	// For example:
+	//
+	//   CREATE TABLE tab (
+	//     k INT PRIMARY KEY,
+	//     region crdb_internal_region AS (
+	//       CASE WHEN k < 10 THEN 'us-east1' ELSE 'us-west1' END
+	//     ) STORED
+	//   ) LOCALITY REGIONAL BY ROW AS region
+	//
+	// Because this is a REGIONAL BY ROW table, the region column is implicitly
+	// added to the front of every index, including the primary index. As a
+	// result, we would normally need to add a uniqueness check to all mutations
+	// to ensure that the primary key column (k in this case) remains unique.
+	// However, because the region column is computed and depends only on k, the
+	// presence of the unique index on (region, k) (i.e., the primary index) is
+	// sufficient to guarantee the uniqueness of k.
+	var uniqueCols opt.ColSet
+	h.uniqueOrdinals.ForEach(func(ord int) {
+		colID := h.scanScope.cols[ord].id
+		uniqueCols.Add(colID)
+	})
+	fds := &h.scanScope.expr.Relational().FuncDeps
+	return !fds.ColsAreLaxKey(uniqueCols)
 }
 
 // buildInsertionCheck creates a unique check for rows which are added to a
@@ -214,10 +256,9 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
 
 	// Build a self semi-join, with the new values on the left and the
 	// existing values on the right.
-	scanScope, ordinals := h.buildTableScan()
 
 	withScanScope, _ := h.mb.buildCheckInputScan(
-		checkInputScanNewVals, ordinals,
+		checkInputScanNewVals, h.scanOrdinals,
 	)
 
 	// Build the join filters:
@@ -238,7 +279,7 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
 		semiJoinFilters = append(semiJoinFilters, f.ConstructFiltersItem(
 			f.ConstructEq(
 				f.ConstructVariable(withScanScope.cols[i].id),
-				f.ConstructVariable(scanScope.cols[i].id),
+				f.ConstructVariable(h.scanScope.cols[i].id),
 			),
 		))
 	}
@@ -255,8 +296,8 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
 		withScanPred := h.mb.b.buildScalar(typedPred, withScanScope, nil, nil, nil)
 		semiJoinFilters = append(semiJoinFilters, f.ConstructFiltersItem(withScanPred))
 
-		typedPred = scanScope.resolveAndRequireType(pred, types.Bool)
-		scanPred := h.mb.b.buildScalar(typedPred, scanScope, nil, nil, nil)
+		typedPred = h.scanScope.resolveAndRequireType(pred, types.Bool)
+		scanPred := h.mb.b.buildScalar(typedPred, h.scanScope, nil, nil, nil)
 		semiJoinFilters = append(semiJoinFilters, f.ConstructFiltersItem(scanPred))
 	}
 
@@ -268,7 +309,7 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
 	for i, ok := h.primaryKeyOrdinals.Next(0); ok; i, ok = h.primaryKeyOrdinals.Next(i + 1) {
 		pkFilterLocal := f.ConstructNe(
 			f.ConstructVariable(withScanScope.cols[i].id),
-			f.ConstructVariable(scanScope.cols[i].id),
+			f.ConstructVariable(h.scanScope.cols[i].id),
 		)
 		if pkFilter == nil {
 			pkFilter = pkFilterLocal
@@ -278,7 +319,7 @@ func (h *uniqueCheckHelper) buildInsertionCheck() memo.UniqueChecksItem {
 	}
 	semiJoinFilters = append(semiJoinFilters, f.ConstructFiltersItem(pkFilter))
 
-	semiJoin := f.ConstructSemiJoin(withScanScope.expr, scanScope.expr, semiJoinFilters, memo.EmptyJoinPrivate)
+	semiJoin := f.ConstructSemiJoin(withScanScope.expr, h.scanScope.expr, semiJoinFilters, memo.EmptyJoinPrivate)
 
 	// Collect the key columns that will be shown in the error message if there
 	// is a duplicate key violation resulting from this uniqueness check.