Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: consider virtual PK columns as stored in the optimizer #75898

Merged
merged 1 commit into from
Feb 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/hash_sharded_index
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ statement ok
SET experimental_enable_hash_sharded_indexes = true

# Tests for creating a hash sharded primary key
statement ok
statement ok
CREATE TABLE sharded_primary (a INT PRIMARY KEY USING HASH WITH BUCKET_COUNT = 10)

query TT
Expand Down Expand Up @@ -359,7 +359,7 @@ ALTER TABLE sharded_secondary ADD COLUMN c INT AS (mod(a, 100)) STORED
statement error cannot create a sharded index on a computed column
CREATE INDEX ON sharded_secondary (a, c) USING HASH WITH BUCKET_COUNT=12;

# Ensure that sharded indexes cannot be created on computed columns
# Ensure that sharded indexes cannot be created on computed columns
# in the same txn
statement error cannot create a sharded index on a computed column
CREATE TABLE shard_on_computed_column (
Expand Down
22 changes: 22 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/virtual_columns
Original file line number Diff line number Diff line change
Expand Up @@ -1241,3 +1241,25 @@ query ITI
SELECT * FROM t73372
----
0 foo 0

# Regression test for #75147. The optimizer should consider virtual PK columns
# as stored to prevent infinite recursion during query planning.
statement ok
CREATE TABLE t75147 (
a INT,
b INT,
c INT,
v1 INT AS (c) VIRTUAL,
v2 INT AS (c) VIRTUAL,
PRIMARY KEY (b, v1, v2),
INDEX (a)
);

statement ok
SELECT 'foo'
FROM t75147 AS t1
JOIN t75147 AS t2 ON
t1.v2 = t2.v2
AND t1.v1 = t2.v1
AND t1.b = t2.b
JOIN t75147 AS t3 ON t1.a = t3.a;
112 changes: 49 additions & 63 deletions pkg/sql/opt/exec/execbuilder/testdata/hash_sharded_index
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,16 @@ query T
EXPLAIN (OPT, CATALOG) SELECT * FROM t
----
TABLE t
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) stored [hidden]
├── a int not null
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
├── CHECK (crdb_internal_a_shard_8 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8))
├── PRIMARY INDEX t_pkey
│ ├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) virtual [hidden]
│ ├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32("crdb_internal.datums_to_bytes"(a)), 8:::INT8)) stored [hidden]
│ └── a int not null
└── UNIQUE WITHOUT INDEX (a)
scan t
scan t
├── check constraint expressions
│ └── crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7)
└── computed column expressions
Expand Down Expand Up @@ -175,16 +175,16 @@ query T
EXPLAIN (OPT, CATALOG) SELECT * FROM t
----
TABLE t
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) virtual [hidden]
├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) stored [hidden]
├── a int not null
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
├── CHECK (crdb_internal_a_shard_8 IN (0:::INT8, 1:::INT8, 2:::INT8, 3:::INT8, 4:::INT8, 5:::INT8, 6:::INT8, 7:::INT8))
├── PRIMARY INDEX t_pkey
│ ├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) virtual [hidden]
│ ├── crdb_internal_a_shard_8 int4 not null as (mod(fnv32(crdb_internal.datums_to_bytes(a)), 8:::INT8)) stored [hidden]
│ └── a int not null
└── UNIQUE WITHOUT INDEX (a)
scan t
scan t
├── check constraint expressions
│ └── crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7)
└── computed column expressions
Expand Down Expand Up @@ -239,12 +239,12 @@ vectorized: true
│ render check1: true
│ render crdb_internal_a_shard_8_cast: 1
│ render a_new: 4321
│ render crdb_internal_a_shard_8: mod(fnv32(crdb_internal.datums_to_bytes(a)), 8)
│ render crdb_internal_a_shard_8: crdb_internal_a_shard_8
│ render a: a
│ render b: b
└── • scan
columns: (a, b)
columns: (crdb_internal_a_shard_8, a, b)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /6/1234/0
Expand Down Expand Up @@ -325,18 +325,12 @@ vectorized: true
│ row 0, expr 1: 8765
│ row 0, expr 2: 1
└── • render
│ columns: (crdb_internal_a_shard_8, a, b)
│ estimated row count: 1 (missing stats)
│ render crdb_internal_a_shard_8: mod(fnv32(crdb_internal.datums_to_bytes(a)), 8)
│ render a: a
│ render b: b
└── • scan
columns: (a, b)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0
└── • scan
columns: (crdb_internal_a_shard_8, a, b)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0
locking strength: for update

# TODO (issue #75498): The `lookup join` in this test output is unnecessary
# since we're using unique without index on (a) as an arbiter and using the
Expand All @@ -356,40 +350,36 @@ vectorized: true
│ arbiter constraints: t_hash_indexed_pkey
└── • render
│ columns: (crdb_internal_a_shard_8_cast, column1, column2, check1)
│ estimated row count: 0 (missing stats)
│ render check1: crdb_internal_a_shard_8_cast IN (0, 1, 2, 3, 4, 5, 6, 7)
│ render column1: column1
│ render column2: column2
│ render crdb_internal_a_shard_8_cast: crdb_internal_a_shard_8_cast
└── • lookup join (anti)
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ estimated row count: 0 (missing stats)
│ table: t_hash_indexed@t_hash_indexed_pkey
│ equality cols are key
│ lookup condition: (column1 = a) AND (crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7))
└── • cross join (anti)
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ estimated row count: 0 (missing stats)
├── • values
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ size: 3 columns, 1 row
│ row 0, expr 0: 4321
│ row 0, expr 1: 8765
│ row 0, expr 2: 1
└── • project
│ columns: ()
│ estimated row count: 1 (missing stats)
└── • scan
columns: (a)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0
│ columns: (crdb_internal_a_shard_8_cast, column1, column2, check1)
│ estimated row count: 0 (missing stats)
│ render check1: crdb_internal_a_shard_8_cast IN (0, 1, 2, 3, 4, 5, 6, 7)
│ render column1: column1
│ render column2: column2
│ render crdb_internal_a_shard_8_cast: crdb_internal_a_shard_8_cast
└── • lookup join (anti)
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ estimated row count: 0 (missing stats)
│ table: t_hash_indexed@t_hash_indexed_pkey
│ equality cols are key
│ lookup condition: (column1 = a) AND (crdb_internal_a_shard_8 IN (0, 1, 2, 3, 4, 5, 6, 7))
└── • cross join (anti)
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ estimated row count: 0 (missing stats)
├── • values
│ columns: (column1, column2, crdb_internal_a_shard_8_cast)
│ size: 3 columns, 1 row
│ row 0, expr 0: 4321
│ row 0, expr 1: 8765
│ row 0, expr 2: 1
└── • scan
columns: (crdb_internal_a_shard_8, a)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0

query T
EXPLAIN (VERBOSE) INSERT INTO t_hash_indexed VALUES (4321, 8765) ON CONFLICT (a) DO NOTHING
Expand Down Expand Up @@ -423,15 +413,11 @@ vectorized: true
│ row 0, expr 1: 8765
│ row 0, expr 2: 1
└── • project
│ columns: ()
│ estimated row count: 1 (missing stats)
└── • scan
columns: (a)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0
└── • scan
columns: (a)
estimated row count: 1 (missing stats)
table: t_hash_indexed@t_hash_indexed_pkey
spans: /1/4321/0

# Test to make sure unqiueness checks are omitted for unique without index
# constraints that are derived from hash-sharded indexes on secondary index.
Expand Down
9 changes: 7 additions & 2 deletions pkg/sql/opt/testutils/testcat/create_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (tc *Catalog) CreateTable(stmt *tree.CreateTable) *Table {

tab := &Table{TabID: tc.nextStableID(), TabName: stmt.Table, Catalog: tc}

// Find the PK columns; we have to force these to be non-nullable.
// Find the PK columns.
pkCols := make(map[tree.Name]struct{})
for _, def := range stmt.Defs {
switch def := def.(type) {
Expand All @@ -100,7 +100,11 @@ func (tc *Catalog) CreateTable(stmt *tree.CreateTable) *Table {
case *tree.ColumnTableDef:
if !isMutationColumn(def) {
if _, isPKCol := pkCols[def.Name]; isPKCol {
// Force PK columns to be non-nullable and non-virtual.
def.Nullable.Nullability = tree.NotNull
if def.Computed.Computed {
def.Computed.Virtual = false
}
}
tab.addColumn(def)
}
Expand Down Expand Up @@ -714,7 +718,8 @@ func (tt *Table) addIndexWithVersion(
tt.deleteOnlyIdxCount++
}

// Add explicit columns and mark primary key columns as not null.
// Add explicit columns. Primary key columns definitions have already been
// updated to be non-nullable and non-virtual.
// Add the geoConfig if applicable.
idx.ExplicitColCount = len(def.Columns)
notNullIndex := true
Expand Down
21 changes: 21 additions & 0 deletions pkg/sql/opt/testutils/testcat/testdata/table
Original file line number Diff line number Diff line change
Expand Up @@ -365,3 +365,24 @@ TABLE generated_as_identity_seq_opt
├── tableoid oid [hidden] [system]
└── PRIMARY INDEX generated_as_identity_seq_opt_pkey
└── rowid int not null default (unique_rowid()) [hidden]

# Virtual column in primary key is treated as a stored column in the optimizer.
exec-ddl
CREATE TABLE virtpk (
a INT,
v INT AS (a + 10) VIRTUAL,
PRIMARY KEY (v, a)
)
----

exec-ddl
SHOW CREATE virtpk
----
TABLE virtpk
├── a int not null
├── v int not null as (a + 10) stored
├── crdb_internal_mvcc_timestamp decimal [hidden] [system]
├── tableoid oid [hidden] [system]
└── PRIMARY INDEX virtpk_pkey
├── v int not null as (a + 10) stored
└── a int not null
21 changes: 11 additions & 10 deletions pkg/sql/opt_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/stats"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
Expand Down Expand Up @@ -660,7 +659,10 @@ func newOptTable(
zone: tblZone,
}

// First, determine how many columns we will potentially need.
// Determine the primary key columns.
pkCols := desc.GetPrimaryIndex().CollectKeyColumnIDs()

// Determine how many columns we will potentially need.
cols := ot.desc.DeletableColumns()
numCols := len(ot.desc.AllColumns())
// Add one for each inverted index column.
Expand Down Expand Up @@ -690,7 +692,10 @@ func newOptTable(
kind = cat.DeleteOnly
visibility = cat.Inaccessible
}
if !col.IsVirtual() {
// Primary key columns that are virtual in the descriptor are considered
// "stored" from the perspective of the optimizer because they are
// written to the primary index and all secondary indexes.
if !col.IsVirtual() || pkCols.Contains(col.GetID()) {
ot.columns[col.Ordinal()].Init(
col.Ordinal(),
cat.StableID(col.GetID()),
Expand Down Expand Up @@ -1237,15 +1242,11 @@ func (oi *optIndex) init(
// descriptor does not contain columns that are not explicitly part of the
// primary key. Retrieve those columns from the table descriptor.
oi.storedCols = make([]descpb.ColumnID, 0, tab.ColumnCount()-idx.NumKeyColumns())
var pkCols util.FastIntSet
for i := 0; i < idx.NumKeyColumns(); i++ {
id := idx.GetKeyColumnID(i)
pkCols.Add(int(id))
}
pkCols := idx.CollectKeyColumnIDs()
for i, n := 0, tab.ColumnCount(); i < n; i++ {
if col := tab.Column(i); col.Kind() != cat.Inverted && !col.IsVirtualComputed() {
if id := col.ColID(); !pkCols.Contains(int(id)) {
oi.storedCols = append(oi.storedCols, descpb.ColumnID(id))
if id := descpb.ColumnID(col.ColID()); !pkCols.Contains(id) {
oi.storedCols = append(oi.storedCols, id)
}
}
}
Expand Down