Skip to content

Commit

Permalink
sql: consider virtual PK columns as stored in the optimizer
Browse files Browse the repository at this point in the history
Allowing virtual primary key columns created a contradiction of two
hard-coded assumptions of the optimizer:

  1. Primary key columns are written to the primary index (and all
     secondary indexes).
  2. Virtual columns are not written to the primary index.

To prevent this contradiction from causing bugs while planning, the opt
catalog presents columns that are marked as virtual PK columns in the
descriptor as stored columns to the optimizer. This is valid because
virtual computed PK columns behave exactly like stored computed PK
columns: they are written to all indexes.

The test catalog has been updated to mimic this behavior.

Fixes cockroachdb#75147

Release note (bug fix): A bug has been fixed that caused internal errors
when querying tables with virtual columns in the primary key. This bug
was only present since version 22.1.0-alpha.1 and does not appear in any
production releases.
  • Loading branch information
mgartner authored and RajivTS committed Mar 6, 2022
1 parent 0e3b73f commit 0c1c147
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 77 deletions.
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/hash_sharded_index
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ statement ok
SET experimental_enable_hash_sharded_indexes = true

# Tests for creating a hash sharded primary key
statement ok
statement ok
CREATE TABLE sharded_primary (a INT PRIMARY KEY USING HASH WITH BUCKET_COUNT = 10)

query TT
Expand Down Expand Up @@ -359,7 +359,7 @@ ALTER TABLE sharded_secondary ADD COLUMN c INT AS (mod(a, 100)) STORED
statement error cannot create a sharded index on a computed column
CREATE INDEX ON sharded_secondary (a, c) USING HASH WITH BUCKET_COUNT=12;

# Ensure that sharded indexes cannot be created on computed columns
# Ensure that sharded indexes cannot be created on computed columns
# in the same txn
statement error cannot create a sharded index on a computed column
CREATE TABLE shard_on_computed_column (
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/virtual_columns
Original file line number Diff line number Diff line change
Expand Up @@ -1241,3 +1241,25 @@ query ITI
SELECT * FROM t73372
----
0 foo 0

# Regression test for #75147. The optimizer should consider virtual PK columns
# as stored to prevent infinite recursion during query planning.
statement ok
CREATE TABLE t75147 (
a INT,
b INT,
c INT,
v1 INT AS (c) VIRTUAL,
v2 INT AS (c) VIRTUAL,
PRIMARY KEY (b, v1, v2),
INDEX (a)
);

statement ok
SELECT 'foo'
FROM t75147 AS t1
JOIN t75147 AS t2 ON
t1.v2 = t2.v2
AND t1.v1 = t2.v1
AND t1.b = t2.b
JOIN t75147 AS t3 ON t1.a = t3.a;
112 changes: 49 additions & 63 deletions pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,16 @@ query T
EXPLAIN (OPT, CATALOG) SELECT * FROM t
----
TABLE t
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) stored [hidden]
├── a int not null
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
├── CHECK (crdb_internal_a_shard_8 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8))
├── PRIMARY INDEX t_pkey
│ ├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) virtual [hidden]
│ ├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) stored [hidden]
│ └── a int not null
└── UNIQUE WITHOUT INDEX (a)
scan t
scan t
├── check constraint expressions
│ └── crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7)
└── computed column expressions
Expand Down Expand Up @@ -175,16 +175,16 @@ query T
EXPLAIN (OPT, CATALOG) SELECT * FROM t
----
TABLE t
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) stored [hidden]
├── a int not null
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
├── CHECK (crdb_internal_a_shard_8 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8))
├── PRIMARY INDEX t_pkey
│ ├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) virtual [hidden]
│ ├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) stored [hidden]
│ └── a int not null
└── UNIQUE WITHOUT INDEX (a)
scan t
scan t
├── check constraint expressions
│ └── crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7)
└── computed column expressions
Expand Down Expand Up @@ -239,12 +239,12 @@ vectorized: true
│ render check1: true
│ render crdb_internal_a_shard_8_cast: 1
│ render a_new: 4321
│ render crdb_internal_a_shard_8: mod(fnv32(crdb_internal.datums_to_bytes(a)), 8)
│ render crdb_internal_a_shard_8: crdb_internal_a_shard_8
│ render a: a
│ render b: b
└── • scan
columns: (a, b)
columns: (crdb_internal_a_shard_8, a, b)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /6/1234/0
Expand Down Expand Up @@ -325,18 +325,12 @@ vectorized: true
│ row 0, expr 1: 8765
│ row 0, expr 2: 1
└── • render
│ columns: (crdb_internal_a_shard_8, a, b)
│ estimated row count: 1 (missing stats)
│ render crdb_internal_a_shard_8: mod(fnv32(crdb_internal.datums_to_bytes(a)), 8)
│ render a: a
│ render b: b
└── • scan
columns: (a, b)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0
└── • scan
columns: (crdb_internal_a_shard_8, a, b)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0
locking strength: for update

# TODO (issue #75498): The `lookup join` in this test output is unnecessary
# since we're using unique without index on (a) as an arbiter and using the
Expand All @@ -356,40 +350,36 @@ vectorized: true
│ arbiter constraints: t_hash_indexed_pkey
└── • render
│ columns: (crdb_internal_a_shard_8_cast, column1, column2, check1)
│ estimated row count: 0 (missing stats)
│ render check1: crdb_internal_a_shard_8_cast IN (0, 1, 2, 3, 4, 5, 6, 7)
│ render column1: column1
│ render column2: column2
│ render crdb_internal_a_shard_8_cast: crdb_internal_a_shard_8_cast
└── • lookup join (anti)
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ estimated row count: 0 (missing stats)
│ table: t_hash_indexed@t_hash_indexed_pkey
│ equality cols are key
│ lookup condition: (column1 = a) AND (crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7))
└── • cross join (anti)
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ estimated row count: 0 (missing stats)
├── • values
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ size: 3 columns, 1 row
│ row 0, expr 0: 4321
│ row 0, expr 1: 8765
│ row 0, expr 2: 1
└── • project
│ columns: ()
│ estimated row count: 1 (missing stats)
└── • scan
columns: (a)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0
│ columns: (crdb_internal_a_shard_8_cast, column1, column2, check1)
│ estimated row count: 0 (missing stats)
│ render check1: crdb_internal_a_shard_8_cast IN (0, 1, 2, 3, 4, 5, 6, 7)
│ render column1: column1
│ render column2: column2
│ render crdb_internal_a_shard_8_cast: crdb_internal_a_shard_8_cast
└── • lookup join (anti)
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ estimated row count: 0 (missing stats)
│ table: t_hash_indexed@t_hash_indexed_pkey
│ equality cols are key
│ lookup condition: (column1 = a) AND (crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7))
└── • cross join (anti)
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ estimated row count: 0 (missing stats)
├── • values
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ size: 3 columns, 1 row
│ row 0, expr 0: 4321
│ row 0, expr 1: 8765
│ row 0, expr 2: 1
└── • scan
columns: (crdb_internal_a_shard_8, a)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0

query T
EXPLAIN (VERBOSE) INSERT INTO t_hash_indexed VALUES (4321, 8765) ON CONFLICT (a) DO NOTHING
Expand Down Expand Up @@ -423,15 +413,11 @@ vectorized: true
│ row 0, expr 1: 8765
│ row 0, expr 2: 1
└── • project
│ columns: ()
│ estimated row count: 1 (missing stats)
└── • scan
columns: (a)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0
└── • scan
columns: (a)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0

# Test to make sure unqiueness checks are omitted for unique without index
# constraints that are derived from hash-sharded indexes on secondary index.
Expand Down
9 changes: 7 additions & 2 deletions pkg/sql/opt/testutils/testcat/create_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (tc *Catalog) CreateTable(stmt *tree.CreateTable) *Table {

tab := &Table{TabID: tc.nextStableID(), TabName: stmt.Table, Catalog: tc}

// Find the PK columns; we have to force these to be non-nullable.
// Find the PK columns.
pkCols := make(map[tree.Name]struct{})
for _, def := range stmt.Defs {
switch def := def.(type) {
Expand All @@ -100,7 +100,11 @@ func (tc *Catalog) CreateTable(stmt *tree.CreateTable) *Table {
case *tree.ColumnTableDef:
if !isMutationColumn(def) {
if _, isPKCol := pkCols[def.Name]; isPKCol {
// Force PK columns to be non-nullable and non-virtual.
def.Nullable.Nullability = tree.NotNull
if def.Computed.Computed {
def.Computed.Virtual = false
}
}
tab.addColumn(def)
}
Expand Down Expand Up @@ -714,7 +718,8 @@ func (tt *Table) addIndexWithVersion(
tt.deleteOnlyIdxCount++
}

// Add explicit columns and mark primary key columns as not null.
// Add explicit columns. Primary key columns definitions have already been
// updated to be non-nullable and non-virtual.
// Add the geoConfig if applicable.
idx.ExplicitColCount = len(def.Columns)
notNullIndex := true
Expand Down
21 changes: 21 additions & 0 deletions pkg/sql/opt/testutils/testcat/testdata/table
Original file line number Diff line number Diff line change
Expand Up @@ -365,3 +365,24 @@ TABLE generated_as_identity_seq_opt
├── tableoid oid [hidden] [system]
└── PRIMARY INDEX generated_as_identity_seq_opt_pkey
└── rowid int not null default (unique_rowid()) [hidden]

# Virtual column in primary key is treated as a stored column in the optimizer.
exec-ddl
CREATE TABLE virtpk (
a INT,
v INT AS (a + 10) VIRTUAL,
PRIMARY KEY (v, a)
)
----

exec-ddl
SHOW CREATE virtpk
----
TABLE virtpk
├── a int not null
├── v int not null as (a + 10) stored
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
└── PRIMARY INDEX virtpk_pkey
├── v int not null as (a + 10) stored
└── a int not null
21 changes: 11 additions & 10 deletions pkg/sql/opt_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
Expand Down Expand Up @@ -660,7 +659,10 @@ func newOptTable(
zone: tblZone,
}

// First, determine how many columns we will potentially need.
// Determine the primary key columns.
pkCols := desc.GetPrimaryIndex().CollectKeyColumnIDs()

// Determine how many columns we will potentially need.
cols := ot.desc.DeletableColumns()
numCols := len(ot.desc.AllColumns())
// Add one for each inverted index column.
Expand Down Expand Up @@ -690,7 +692,10 @@ func newOptTable(
kind = cat.DeleteOnly
visibility = cat.Inaccessible
}
if !col.IsVirtual() {
// Primary key columns that are virtual in the descriptor are considered
// "stored" from the perspective of the optimizer because they are
// written to the primary index and all secondary indexes.
if !col.IsVirtual() || pkCols.Contains(col.GetID()) {
ot.columns[col.Ordinal()].Init(
col.Ordinal(),
cat.StableID(col.GetID()),
Expand Down Expand Up @@ -1237,15 +1242,11 @@ func (oi *optIndex) init(
// descriptor does not contain columns that are not explicitly part of the
// primary key. Retrieve those columns from the table descriptor.
oi.storedCols = make([]descpb.ColumnID, 0, tab.ColumnCount()-idx.NumKeyColumns())
var pkCols util.FastIntSet
for i := 0; i < idx.NumKeyColumns(); i++ {
id := idx.GetKeyColumnID(i)
pkCols.Add(int(id))
}
pkCols := idx.CollectKeyColumnIDs()
for i, n := 0, tab.ColumnCount(); i < n; i++ {
if col := tab.Column(i); col.Kind() != cat.Inverted && !col.IsVirtualComputed() {
if id := col.ColID(); !pkCols.Contains(int(id)) {
oi.storedCols = append(oi.storedCols, descpb.ColumnID(id))
if id := descpb.ColumnID(col.ColID()); !pkCols.Contains(id) {
oi.storedCols = append(oi.storedCols, id)
}
}
}
Expand Down

0 comments on commit 0c1c147

Please sign in to comment.