Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: plan inner lookup joins on virtual column indexes in more cases #76078

Merged
merged 1 commit into from
Feb 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/expression_index
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# LogicTest: local

statement ok
CREATE TABLE mn (
k INT PRIMARY KEY,
m INT,
n STRING,
FAMILY (k, m, n)
)

statement ok
CREATE TABLE t (
k INT PRIMARY KEY,
Expand Down Expand Up @@ -87,3 +95,58 @@ SELECT * FROM [
missing stats
table: t@t_lower_c_a_plus_b_idx
spans: [/'foo'/111 - /'foo']

# Lookup joins can be planned on expression indexes.
# TODO(mgartner): We must drop these indexes to be able to plan a lookup join.
# This is required because of a limitation of the normalization rule
# ExtractJoinEqualities: it can only choose a single virtual column to project
# from the children of the join. When there are multiple expression indexes with
# the same expression, we create multiple virtual columns with the same
# expression in the table descriptor. If ExtractJoinEqualities picks the wrong
# virtual column to project, GenerateLookupJoinsWithVirtualCols will fail to
# generate a lookup join. This can be solved by making the opt catalog present
# all expression index virtual columns with the same expressions as a single
# virtual column.
statement ok
DROP INDEX t_a_plus_b_idx;
DROP INDEX t_lower_c_idx

query T
SELECT * FROM [
EXPLAIN SELECT * FROM mn INNER LOOKUP JOIN t ON n = lower(c)
] OFFSET 2
----
·
• lookup join
│ table: t@t_pkey
│ equality: (k) = (k)
│ equality cols are key
└── • lookup join
│ table: t@t_lower_c_a_plus_b_idx
│ equality: (n) = (crdb_internal_idx_expr_2)
└── • scan
missing stats
table: mn@mn_pkey
spans: FULL SCAN

query T
SELECT * FROM [
EXPLAIN SELECT * FROM mn INNER LOOKUP JOIN t ON m = a + b AND n = lower(c)
] OFFSET 2
----
·
• lookup join
│ table: t@t_pkey
│ equality: (k) = (k)
│ equality cols are key
└── • lookup join
│ table: t@t_lower_c_a_plus_b_idx
│ equality: (n, m) = (crdb_internal_idx_expr_2,crdb_internal_idx_expr_3)
└── • scan
missing stats
table: mn@mn_pkey
spans: FULL SCAN
23 changes: 15 additions & 8 deletions pkg/sql/opt/norm/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,8 @@ func (c *CustomFuncs) ExtractJoinEquality(
}

var leftProj, rightProj projectBuilder
leftProj.init(c.f)
rightProj.init(c.f)
leftProj.init(c, leftCols)
rightProj.init(c, rightCols)

newFilters := make(memo.FiltersExpr, len(filters))
for i := range filters {
Expand All @@ -540,19 +540,26 @@ func (c *CustomFuncs) ExtractJoinEquality(
c.f.ConstructEq(leftProj.add(a), rightProj.add(b)),
)
}
if leftProj.empty() && rightProj.empty() {
panic(errors.AssertionFailedf("no equalities to extract"))
}

join := c.f.ConstructJoin(
joinOp,
leftProj.buildProject(left, leftCols),
rightProj.buildProject(right, rightCols),
leftProj.buildProject(left),
rightProj.buildProject(right),
newFilters,
private,
)

// Project away the synthesized columns.
if leftProj.empty() && rightProj.empty() {
// If no new projections were created, then there are no synthesized
// columns to project away, so we can return the join. This is possible
// when projections that are added to left and right are identical to
// computed columns that are already output left and right. There's no
// need to re-project these expressions, so projectBuilder will simply
// pass them through.
return join
}

// Otherwise, project away the synthesized columns.
outputCols := leftCols
if joinOp != opt.SemiJoinOp && joinOp != opt.AntiJoinOp {
// Semi/Anti join only produce the left side columns. All other join types
Expand Down
63 changes: 50 additions & 13 deletions pkg/sql/opt/norm/project_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,23 @@ import (
// input with new synthesized and passthrough columns. Sample usage:
//
// var pb projectBuilder
// pb.init(c)
// pb.init(c, passthrough)
// e1 := pb.add(some expression)
// e2 := pb.add(some other expression)
// augmentedInput := pb.buildProject(input, passthrough)
// augmentedInput := pb.buildProject(input)
// // e1 and e2 are VariableOp expressions, with input columns
// // produced by augmentedInput.
//
type projectBuilder struct {
f *Factory
c *CustomFuncs
projections memo.ProjectionsExpr
passthrough opt.ColSet
}

func (pb *projectBuilder) init(f *Factory) {
func (pb *projectBuilder) init(c *CustomFuncs, passthrough opt.ColSet) {
// This initialization pattern ensures that fields are not unwittingly
// reused. Field reuse must be explicit.
*pb = projectBuilder{f: f}
*pb = projectBuilder{c: c, passthrough: passthrough}
}

// empty returns true if there are no synthesized columns (and hence a
Expand All @@ -43,26 +44,62 @@ func (pb *projectBuilder) empty() bool {
return len(pb.projections) == 0
}

// add incorporates the given expression as a projection, unless the expression
// is already a "bare" variable. Returns a bare variable expression referring to
// the synthesized column.
// add attempts to incorporate the given expression as a projection. If the
// expression is already a "bare" variable, no projection is created and the
// variable is returned. If the expression matches a computed column expression
// in a base table, then the computed column is used as the projection column
// and is returned. If this computed column already exists as a passthrough
// column, then no projection is added. If the expression is not a "bare"
// variable and does not match a computed column expression, a new column is
// synthesized, a projection is created, and the synthesized column is returned.
func (pb *projectBuilder) add(e opt.ScalarExpr) opt.ScalarExpr {
if v, ok := e.(*memo.VariableExpr); ok {
// The expression is a bare variable; we don't need to synthesize a column.
return v
}

newCol := pb.f.Metadata().AddColumn("", e.DataType())
pb.projections = append(pb.projections, pb.f.ConstructProjectionsItem(e, newCol))
return pb.f.ConstructVariable(newCol)
// Look for a computed column in a base table with an identical expression.
// If one exists, we can use it as the projection column instead of
// synthesizing a new column.
var projectCol opt.ColumnID
if cols := pb.c.OuterCols(e); !cols.Empty() {
// Get the base table of the first column referenced in e.
col, _ := cols.Next(0)
if tabID := pb.c.f.Metadata().ColumnMeta(col).Table; tabID != 0 {
// If the column has a base table (i.e., it is not a synthesized
// column), search for a computed column expression in the base
// table identical to e.
for compCol, expr := range pb.c.f.Metadata().TableMeta(tabID).ComputedCols {
if e == expr {
projectCol = compCol
break
}
}
}
}

// If the input to the Project already contains the computed column, then
// there is no need to re-project its expression.
if pb.passthrough.Contains(projectCol) {
return pb.c.f.ConstructVariable(projectCol)
}

// If we did not find an existing computed column with the same expression,
// synthesize a new column.
if projectCol == 0 {
projectCol = pb.c.f.Metadata().AddColumn("", e.DataType())
}

pb.projections = append(pb.projections, pb.c.f.ConstructProjectionsItem(e, projectCol))
return pb.c.f.ConstructVariable(projectCol)
}

// buildProject creates the ProjectOp (if needed). The ProjectOp passes through
// the given passthrough columns and adds any synthesized columns.
func (pb *projectBuilder) buildProject(input memo.RelExpr, passthrough opt.ColSet) memo.RelExpr {
func (pb *projectBuilder) buildProject(input memo.RelExpr) memo.RelExpr {
if pb.empty() {
// Avoid creating a Project that does nothing and just gets elided.
return input
}
return pb.f.ConstructProject(input, pb.projections, passthrough)
return pb.c.f.ConstructProject(input, pb.projections, pb.passthrough)
}
70 changes: 69 additions & 1 deletion pkg/sql/opt/norm/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ CREATE TABLE uv (u INT PRIMARY KEY, v INT)
----

exec-ddl
CREATE TABLE booleans(a BOOL, b BOOL, c BOOL, d BOOL, e BOOL)
CREATE TABLE booleans (a BOOL, b BOOL, c BOOL, d BOOL, e BOOL)
----

exec-ddl
CREATE TABLE comp (i INT, c INT AS (i + 10) STORED, v INT AS (abs(i)) VIRTUAL)
----

norm
Expand Down Expand Up @@ -3199,6 +3203,70 @@ project
├── v:6 = (x:1 + u:5) [outer=(1,5,6), immutable, constraints=(/6: (/NULL - ]), fd=(1,5)-->(6)]
└── column9:9 = u:5 [outer=(5,9), constraints=(/5: (/NULL - ]; /9: (/NULL - ]), fd=(5)==(9), (9)==(5)]

# Computed columns with matching expressions should be reused as projection
# columns.
norm expect=ExtractJoinEqualities
SELECT * FROM xy JOIN comp ON x=i+10 AND y=abs(i)
----
inner-join (hash)
├── columns: x:1!null y:2!null i:5 c:6!null v:7!null
├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one)
├── immutable
├── fd: (1)-->(2), (5)-->(6,7), (1)==(6), (6)==(1), (2)==(7), (7)==(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── project
│ ├── columns: v:7 i:5 c:6
│ ├── immutable
│ ├── fd: (5)-->(6,7)
│ ├── scan comp
│ │ ├── columns: i:5 c:6
│ │ ├── computed column expressions
│ │ │ ├── c:6
│ │ │ │ └── i:5 + 10
│ │ │ └── v:7
│ │ │ └── abs(i:5)
│ │ └── fd: (5)-->(6)
│ └── projections
│ └── abs(i:5) [as=v:7, outer=(5), immutable]
└── filters
├── x:1 = c:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
└── y:2 = v:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)]

# Computed columns with matching expressions should be reused as projection
# columns.
norm expect=ExtractJoinEqualities
SELECT * FROM xy JOIN comp ON i+10=x AND abs(i)=y
----
inner-join (hash)
├── columns: x:1!null y:2!null i:5 c:6!null v:7!null
├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one)
├── immutable
├── fd: (1)-->(2), (5)-->(6,7), (1)==(6), (6)==(1), (2)==(7), (7)==(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── project
│ ├── columns: v:7 i:5 c:6
│ ├── immutable
│ ├── fd: (5)-->(6,7)
│ ├── scan comp
│ │ ├── columns: i:5 c:6
│ │ ├── computed column expressions
│ │ │ ├── c:6
│ │ │ │ └── i:5 + 10
│ │ │ └── v:7
│ │ │ └── abs(i:5)
│ │ └── fd: (5)-->(6)
│ └── projections
│ └── abs(i:5) [as=v:7, outer=(5), immutable]
└── filters
├── x:1 = c:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
└── y:2 = v:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)]

# Cases with non-extractable equality.
norm expect-not=ExtractJoinEqualities
SELECT * FROM xy FULL OUTER JOIN uv ON x=u
Expand Down
Loading