Skip to content

Commit

Permalink
sql: consider virtual PK columns as stored in the optimizer
Browse files Browse the repository at this point in the history
Allowing virtual primary key columns created a contradiction of two
hard-coded assumptions of the optimizer:

  1. Primary key columns are written to the primary index (and all
     secondary indexes).
  2. Virtual columns are not written to the primary index.

To prevent this contradiction from causing bugs while planning, the opt
catalog presents columns that are marked as virtual PK columns in the
descriptor as stored columns to the optimizer. This is valid because
virtual computed PK columns behave exactly like stored computed PK
columns: they are written to all indexes.

The test catalog has been updated to mimic this behavior.

Fixes cockroachdb#75147

Release note (bug fix): A bug has been fixed that caused internal errors
when querying tables with virtual columns in the primary key. This bug
was only present since version 22.1.0-alpha.1 and does not appear in any
production releases.
  • Loading branch information
mgartner committed Feb 3, 2022
1 parent d869341 commit 45bfbb2
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 21 deletions.
18 changes: 9 additions & 9 deletions pkg/sql/logictest/testdata/logic_test/hash_sharded_index
Original file line number Diff line number Diff line change
Expand Up @@ -801,15 +801,15 @@ query T
explain (opt, catalog) select * from t
----
TABLE t
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) stored [hidden]
├── a int not null
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
├── CHECK (crdb_internal_a_shard_8 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8))
└── PRIMARY INDEX t_pkey
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) stored [hidden]
└── a int not null
scan t
scan t
├── check constraint expressions
│ └── crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7)
└── computed column expressions
Expand Down Expand Up @@ -839,15 +839,15 @@ query T
explain (opt, catalog) select * from t
----
TABLE t
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) stored [hidden]
├── a int not null
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
├── CHECK (crdb_internal_a_shard_8 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8))
└── PRIMARY INDEX t_pkey
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) stored [hidden]
└── a int not null
scan t
scan t
├── check constraint expressions
│ └── crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7)
└── computed column expressions
Expand Down Expand Up @@ -883,15 +883,15 @@ query T
explain (opt, catalog) select * from t
----
TABLE t
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) stored [hidden]
├── a int not null
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
├── CHECK (crdb_internal_a_shard_8 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8))
└── PRIMARY INDEX t_pkey
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) stored [hidden]
└── a int not null
scan t
scan t
├── check constraint expressions
│ └── crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7)
└── computed column expressions
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/virtual_columns
Original file line number Diff line number Diff line change
Expand Up @@ -1241,3 +1241,25 @@ query ITI
SELECT * FROM t73372
----
0 foo 0

# Regression test for #75147. The optimizer should consider virtual PK columns
# as stored to prevent infinite recursion during query planning.
statement ok
CREATE TABLE t75147 (
a INT,
b INT,
c INT,
v1 INT AS (c) VIRTUAL,
v2 INT AS (c) VIRTUAL,
PRIMARY KEY (b, v1, v2),
INDEX (a)
);

statement ok
SELECT 'foo'
FROM t75147 AS t1
JOIN t75147 AS t2 ON
t1.v2 = t2.v2
AND t1.v1 = t2.v1
AND t1.b = t2.b
JOIN t75147 AS t3 ON t1.a = t3.a;
9 changes: 7 additions & 2 deletions pkg/sql/opt/testutils/testcat/create_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (tc *Catalog) CreateTable(stmt *tree.CreateTable) *Table {

tab := &Table{TabID: tc.nextStableID(), TabName: stmt.Table, Catalog: tc}

// Find the PK columns; we have to force these to be non-nullable.
// Find the PK columns.
pkCols := make(map[tree.Name]struct{})
for _, def := range stmt.Defs {
switch def := def.(type) {
Expand All @@ -100,7 +100,11 @@ func (tc *Catalog) CreateTable(stmt *tree.CreateTable) *Table {
case *tree.ColumnTableDef:
if !isMutationColumn(def) {
if _, isPKCol := pkCols[def.Name]; isPKCol {
// Force PK columns to be non-nullable and non-virtual.
def.Nullable.Nullability = tree.NotNull
if def.Computed.Computed {
def.Computed.Virtual = false
}
}
tab.addColumn(def)
}
Expand Down Expand Up @@ -714,7 +718,8 @@ func (tt *Table) addIndexWithVersion(
tt.deleteOnlyIdxCount++
}

// Add explicit columns and mark primary key columns as not null.
// Add explicit columns. Primary key columns definitions have already been
// updated to be non-nullable and non-virtual.
// Add the geoConfig if applicable.
idx.ExplicitColCount = len(def.Columns)
notNullIndex := true
Expand Down
21 changes: 21 additions & 0 deletions pkg/sql/opt/testutils/testcat/testdata/table
Original file line number Diff line number Diff line change
Expand Up @@ -365,3 +365,24 @@ TABLE generated_as_identity_seq_opt
├── tableoid oid [hidden] [system]
└── PRIMARY INDEX generated_as_identity_seq_opt_pkey
└── rowid int not null default (unique_rowid()) [hidden]

# Virtual column in primary key is treated as a stored column in the optimizer.
exec-ddl
CREATE TABLE virtpk (
a INT,
v INT AS (a + 10) VIRTUAL,
PRIMARY KEY (v, a)
)
----

exec-ddl
SHOW CREATE virtpk
----
TABLE virtpk
├── a int not null
├── v int not null as (a + 10) stored
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
└── PRIMARY INDEX virtpk_pkey
├── v int not null as (a + 10) stored
└── a int not null
21 changes: 11 additions & 10 deletions pkg/sql/opt_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
Expand Down Expand Up @@ -660,7 +659,10 @@ func newOptTable(
zone: tblZone,
}

// First, determine how many columns we will potentially need.
// Determine the primary key columns.
pkCols := desc.GetPrimaryIndex().CollectKeyColumnIDs()

// Determine how many columns we will potentially need.
cols := ot.desc.DeletableColumns()
numCols := len(ot.desc.AllColumns())
// Add one for each inverted index column.
Expand Down Expand Up @@ -690,7 +692,10 @@ func newOptTable(
kind = cat.DeleteOnly
visibility = cat.Inaccessible
}
if !col.IsVirtual() {
// Primary key columns that are virtual in the descriptor are considered
// "stored" from the perspective of the optimizer because they are
// written to the primary index and all secondary indexes.
if !col.IsVirtual() || pkCols.Contains(col.GetID()) {
ot.columns[col.Ordinal()].Init(
col.Ordinal(),
cat.StableID(col.GetID()),
Expand Down Expand Up @@ -1224,15 +1229,11 @@ func (oi *optIndex) init(
// descriptor does not contain columns that are not explicitly part of the
// primary key. Retrieve those columns from the table descriptor.
oi.storedCols = make([]descpb.ColumnID, 0, tab.ColumnCount()-idx.NumKeyColumns())
var pkCols util.FastIntSet
for i := 0; i < idx.NumKeyColumns(); i++ {
id := idx.GetKeyColumnID(i)
pkCols.Add(int(id))
}
pkCols := idx.CollectKeyColumnIDs()
for i, n := 0, tab.ColumnCount(); i < n; i++ {
if col := tab.Column(i); col.Kind() != cat.Inverted && !col.IsVirtualComputed() {
if id := col.ColID(); !pkCols.Contains(int(id)) {
oi.storedCols = append(oi.storedCols, descpb.ColumnID(id))
if id := descpb.ColumnID(col.ColID()); !pkCols.Contains(id) {
oi.storedCols = append(oi.storedCols, id)
}
}
}
Expand Down

0 comments on commit 45bfbb2

Please sign in to comment.