Skip to content

Commit

Permalink
opt: plan inner lookup joins on virtual column indexes in more cases
Browse files Browse the repository at this point in the history
ExtractJoinEqualities now reuses computed columns instead of
synthesizing new columns when it creates projections that exactly match
a computed column expression of a base table. This allows
GenerateLookupJoinsWithVirtualCols to generate lookup joins in more
cases. This also paves the way for exploring  anti- and semi-lookup
joins on indexes with virtual columns and expression indexes.

Fixes #75872

Release note (performance improvement): The optimizer now plans inner
lookup joins using expression indexes in more cases, resulting in more
efficient query plans.
  • Loading branch information
mgartner committed Feb 4, 2022
1 parent 5304fed commit 56b5ce5
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 24 deletions.
63 changes: 63 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/expression_index
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# LogicTest: local

statement ok
CREATE TABLE mn (
k INT PRIMARY KEY,
m INT,
n STRING,
FAMILY (k, m, n)
)

statement ok
CREATE TABLE t (
k INT PRIMARY KEY,
Expand Down Expand Up @@ -87,3 +95,58 @@ SELECT * FROM [
missing stats
table: t@t_lower_c_a_plus_b_idx
spans: [/'foo'/111 - /'foo']

# Lookup joins can be planned on expression indexes.
# TODO(mgartner): We must drop these indexes to be able to plan a lookup join.
# This is required because of a limitation of the normalization rule
# ExtractJoinEqualities: it can only choose a single virtual column to project
# from the children of the join. When there are multiple expression indexes with
# the same expression, we create multiple virtual columns with the same
# expression in the table descriptor. If ExtractJoinEqualities picks the wrong
# virtual column to project, GenerateLookupJoinsWithVirtualCols will fail to
# generate a lookup join. This can be solved by making the opt catalog present
# all expression index virtual columns with the same expressions as a single
# virtual column.
statement ok
DROP INDEX t_a_plus_b_idx;
DROP INDEX t_lower_c_idx

query T
SELECT * FROM [
EXPLAIN SELECT * FROM mn INNER LOOKUP JOIN t ON n = lower(c)
] OFFSET 2
----
·
• lookup join
│ table: t@t_pkey
│ equality: (k) = (k)
│ equality cols are key
└── • lookup join
│ table: t@t_lower_c_a_plus_b_idx
│ equality: (n) = (crdb_internal_idx_expr_2)
└── • scan
missing stats
table: mn@mn_pkey
spans: FULL SCAN

query T
SELECT * FROM [
EXPLAIN SELECT * FROM mn INNER LOOKUP JOIN t ON m = a + b AND n = lower(c)
] OFFSET 2
----
·
• lookup join
│ table: t@t_pkey
│ equality: (k) = (k)
│ equality cols are key
└── • lookup join
│ table: t@t_lower_c_a_plus_b_idx
│ equality: (n, m) = (crdb_internal_idx_expr_2,crdb_internal_idx_expr_3)
└── • scan
missing stats
table: mn@mn_pkey
spans: FULL SCAN
23 changes: 15 additions & 8 deletions pkg/sql/opt/norm/join_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,8 @@ func (c *CustomFuncs) ExtractJoinEquality(
}

var leftProj, rightProj projectBuilder
leftProj.init(c.f)
rightProj.init(c.f)
leftProj.init(c, leftCols)
rightProj.init(c, rightCols)

newFilters := make(memo.FiltersExpr, len(filters))
for i := range filters {
Expand All @@ -540,19 +540,26 @@ func (c *CustomFuncs) ExtractJoinEquality(
c.f.ConstructEq(leftProj.add(a), rightProj.add(b)),
)
}
if leftProj.empty() && rightProj.empty() {
panic(errors.AssertionFailedf("no equalities to extract"))
}

join := c.f.ConstructJoin(
joinOp,
leftProj.buildProject(left, leftCols),
rightProj.buildProject(right, rightCols),
leftProj.buildProject(left),
rightProj.buildProject(right),
newFilters,
private,
)

// Project away the synthesized columns.
if leftProj.empty() && rightProj.empty() {
// If no new projections were created, then there are no synthesized
// columns to project away, so we can return the join. This is possible
// when projections that are added to left and right are identical to
// computed columns that are already output left and right. There's no
// need to re-project these expressions, so projectBuilder will simply
// pass them through.
return join
}

// Otherwise, project away the synthesized columns.
outputCols := leftCols
if joinOp != opt.SemiJoinOp && joinOp != opt.AntiJoinOp {
// Semi/Anti join only produce the left side columns. All other join types
Expand Down
63 changes: 50 additions & 13 deletions pkg/sql/opt/norm/project_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,23 @@ import (
// input with new synthesized and passthrough columns. Sample usage:
//
// var pb projectBuilder
// pb.init(c)
// pb.init(c, passthrough)
// e1 := pb.add(some expression)
// e2 := pb.add(some other expression)
// augmentedInput := pb.buildProject(input, passthrough)
// augmentedInput := pb.buildProject(input)
// // e1 and e2 are VariableOp expressions, with input columns
// // produced by augmentedInput.
//
type projectBuilder struct {
f *Factory
c *CustomFuncs
projections memo.ProjectionsExpr
passthrough opt.ColSet
}

func (pb *projectBuilder) init(f *Factory) {
func (pb *projectBuilder) init(c *CustomFuncs, passthrough opt.ColSet) {
// This initialization pattern ensures that fields are not unwittingly
// reused. Field reuse must be explicit.
*pb = projectBuilder{f: f}
*pb = projectBuilder{c: c, passthrough: passthrough}
}

// empty returns true if there are no synthesized columns (and hence a
Expand All @@ -43,26 +44,62 @@ func (pb *projectBuilder) empty() bool {
return len(pb.projections) == 0
}

// add incorporates the given expression as a projection, unless the expression
// is already a "bare" variable. Returns a bare variable expression referring to
// the synthesized column.
// add attempts to incorporate the given expression as a projection. If the
// expression is already a "bare" variable, no projection is created and the
// variable is returned. If the expression matches a computed column expression
// in a base table, then the computed column is used as the projection column
// and is returned. If this computed column already exists as a passthrough
// column, then no projection is added. If the expression is not a "bare"
// variable and does not match a computed column expression, a new column is
// synthesized, a projection is created, and the synthesized column is returned.
func (pb *projectBuilder) add(e opt.ScalarExpr) opt.ScalarExpr {
if v, ok := e.(*memo.VariableExpr); ok {
// The expression is a bare variable; we don't need to synthesize a column.
return v
}

newCol := pb.f.Metadata().AddColumn("", e.DataType())
pb.projections = append(pb.projections, pb.f.ConstructProjectionsItem(e, newCol))
return pb.f.ConstructVariable(newCol)
// Look for a computed column in a base table with an identical expression.
// If one exists, we can use it as the projection column instead of
// synthesizing a new column.
var projectCol opt.ColumnID
if cols := pb.c.OuterCols(e); !cols.Empty() {
// Get the base table of the first column referenced in e.
col, _ := cols.Next(0)
if tabID := pb.c.f.Metadata().ColumnMeta(col).Table; tabID != 0 {
// If the column has a base table (i.e., it is not a synthesized
// column), search for a computed column expression in the base
// table identical to e.
for compCol, expr := range pb.c.f.Metadata().TableMeta(tabID).ComputedCols {
if e == expr {
projectCol = compCol
break
}
}
}
}

// If the input to the Project already contains the computed column, then
// there is no need to re-project its expression.
if pb.passthrough.Contains(projectCol) {
return pb.c.f.ConstructVariable(projectCol)
}

// If we did not find an existing computed column with the same expression,
// synthesize a new column.
if projectCol == 0 {
projectCol = pb.c.f.Metadata().AddColumn("", e.DataType())
}

pb.projections = append(pb.projections, pb.c.f.ConstructProjectionsItem(e, projectCol))
return pb.c.f.ConstructVariable(projectCol)
}

// buildProject creates the ProjectOp (if needed). The ProjectOp passes through
// the given passthrough columns and adds any synthesized columns.
func (pb *projectBuilder) buildProject(input memo.RelExpr, passthrough opt.ColSet) memo.RelExpr {
func (pb *projectBuilder) buildProject(input memo.RelExpr) memo.RelExpr {
if pb.empty() {
// Avoid creating a Project that does nothing and just gets elided.
return input
}
return pb.f.ConstructProject(input, pb.projections, passthrough)
return pb.c.f.ConstructProject(input, pb.projections, pb.passthrough)
}
70 changes: 69 additions & 1 deletion pkg/sql/opt/norm/testdata/rules/join
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ CREATE TABLE uv (u INT PRIMARY KEY, v INT)
----

exec-ddl
CREATE TABLE booleans(a BOOL, b BOOL, c BOOL, d BOOL, e BOOL)
CREATE TABLE booleans (a BOOL, b BOOL, c BOOL, d BOOL, e BOOL)
----

exec-ddl
CREATE TABLE comp (i INT, c INT AS (i + 10) STORED, v INT AS (abs(i)) VIRTUAL)
----

norm
Expand Down Expand Up @@ -3199,6 +3203,70 @@ project
├── v:6 = (x:1 + u:5) [outer=(1,5,6), immutable, constraints=(/6: (/NULL - ]), fd=(1,5)-->(6)]
└── column9:9 = u:5 [outer=(5,9), constraints=(/5: (/NULL - ]; /9: (/NULL - ]), fd=(5)==(9), (9)==(5)]

# Computed columns with matching expressions should be reused as projection
# columns.
norm expect=ExtractJoinEqualities
SELECT * FROM xy JOIN comp ON x=i+10 AND y=abs(i)
----
inner-join (hash)
├── columns: x:1!null y:2!null i:5 c:6!null v:7!null
├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one)
├── immutable
├── fd: (1)-->(2), (5)-->(6,7), (1)==(6), (6)==(1), (2)==(7), (7)==(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── project
│ ├── columns: v:7 i:5 c:6
│ ├── immutable
│ ├── fd: (5)-->(6,7)
│ ├── scan comp
│ │ ├── columns: i:5 c:6
│ │ ├── computed column expressions
│ │ │ ├── c:6
│ │ │ │ └── i:5 + 10
│ │ │ └── v:7
│ │ │ └── abs(i:5)
│ │ └── fd: (5)-->(6)
│ └── projections
│ └── abs(i:5) [as=v:7, outer=(5), immutable]
└── filters
├── x:1 = c:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
└── y:2 = v:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)]

# Computed columns with matching expressions should be reused as projection
# columns.
norm expect=ExtractJoinEqualities
SELECT * FROM xy JOIN comp ON i+10=x AND abs(i)=y
----
inner-join (hash)
├── columns: x:1!null y:2!null i:5 c:6!null v:7!null
├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one)
├── immutable
├── fd: (1)-->(2), (5)-->(6,7), (1)==(6), (6)==(1), (2)==(7), (7)==(2)
├── scan xy
│ ├── columns: x:1!null y:2
│ ├── key: (1)
│ └── fd: (1)-->(2)
├── project
│ ├── columns: v:7 i:5 c:6
│ ├── immutable
│ ├── fd: (5)-->(6,7)
│ ├── scan comp
│ │ ├── columns: i:5 c:6
│ │ ├── computed column expressions
│ │ │ ├── c:6
│ │ │ │ └── i:5 + 10
│ │ │ └── v:7
│ │ │ └── abs(i:5)
│ │ └── fd: (5)-->(6)
│ └── projections
│ └── abs(i:5) [as=v:7, outer=(5), immutable]
└── filters
├── x:1 = c:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
└── y:2 = v:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)]

# Cases with non-extractable equality.
norm expect-not=ExtractJoinEqualities
SELECT * FROM xy FULL OUTER JOIN uv ON x=u
Expand Down
Loading

0 comments on commit 56b5ce5

Please sign in to comment.