diff --git a/pkg/sql/opt/exec/execbuilder/testdata/expression_index b/pkg/sql/opt/exec/execbuilder/testdata/expression_index index b8ebcb78e715..04faaaefdd45 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/expression_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/expression_index @@ -1,5 +1,13 @@ # LogicTest: local +statement ok +CREATE TABLE mn ( + k INT PRIMARY KEY, + m INT, + n STRING, + FAMILY (k, m, n) +) + statement ok CREATE TABLE t ( k INT PRIMARY KEY, @@ -87,3 +95,58 @@ SELECT * FROM [ missing stats table: t@t_lower_c_a_plus_b_idx spans: [/'foo'/111 - /'foo'] + +# Lookup joins can be planned on expression indexes. +# TODO(mgartner): We must drop these indexes to be able to plan a lookup join. +# This is required because of a limitation of the normalization rule +# ExtractJoinEqualities: it can only choose a single virtual column to project +# from the children of the join. When there are multiple expression indexes with +# the same expression, we create multiple virtual columns with the same +# expression in the table descriptor. If ExtractJoinEqualities picks the wrong +# virtual column to project, GenerateLookupJoinsWithVirtualCols will fail to +# generate a lookup join. This can be solved by making the opt catalog present +# all expression index virtual columns with the same expressions as a single +# virtual column. +statement ok +DROP INDEX t_a_plus_b_idx; +DROP INDEX t_lower_c_idx + +query T +SELECT * FROM [ + EXPLAIN SELECT * FROM mn INNER LOOKUP JOIN t ON n = lower(c) +] OFFSET 2 +---- +· +• lookup join +│ table: t@t_pkey +│ equality: (k) = (k) +│ equality cols are key +│ +└── • lookup join + │ table: t@t_lower_c_a_plus_b_idx + │ equality: (n) = (crdb_internal_idx_expr_2) + │ + └── • scan + missing stats + table: mn@mn_pkey + spans: FULL SCAN + +query T +SELECT * FROM [ + EXPLAIN SELECT * FROM mn INNER LOOKUP JOIN t ON m = a + b AND n = lower(c) +] OFFSET 2 +---- +· +• lookup join +│ table: t@t_pkey +│ equality: (k) = (k) +│ equality cols are key +│ +└── • lookup join + │ table: t@t_lower_c_a_plus_b_idx + │ equality: (n, m) = (crdb_internal_idx_expr_2,crdb_internal_idx_expr_3) + │ + └── • scan + missing stats + table: mn@mn_pkey + spans: FULL SCAN diff --git a/pkg/sql/opt/norm/join_funcs.go b/pkg/sql/opt/norm/join_funcs.go index ef6e21a960f4..9b9d3de3ef79 100644 --- a/pkg/sql/opt/norm/join_funcs.go +++ b/pkg/sql/opt/norm/join_funcs.go @@ -526,8 +526,8 @@ func (c *CustomFuncs) ExtractJoinEquality( } var leftProj, rightProj projectBuilder - leftProj.init(c.f) - rightProj.init(c.f) + leftProj.init(c, leftCols) + rightProj.init(c, rightCols) newFilters := make(memo.FiltersExpr, len(filters)) for i := range filters { @@ -540,19 +540,26 @@ func (c *CustomFuncs) ExtractJoinEquality( c.f.ConstructEq(leftProj.add(a), rightProj.add(b)), ) } - if leftProj.empty() && rightProj.empty() { - panic(errors.AssertionFailedf("no equalities to extract")) - } join := c.f.ConstructJoin( joinOp, - leftProj.buildProject(left, leftCols), - rightProj.buildProject(right, rightCols), + leftProj.buildProject(left), + rightProj.buildProject(right), newFilters, private, ) - // Project away the synthesized columns. + if leftProj.empty() && rightProj.empty() { + // If no new projections were created, then there are no synthesized + // columns to project away, so we can return the join. This is possible + // when projections that are added to left and right are identical to + // computed columns that are already output left and right. There's no + // need to re-project these expressions, so projectBuilder will simply + // pass them through. + return join + } + + // Otherwise, project away the synthesized columns. outputCols := leftCols if joinOp != opt.SemiJoinOp && joinOp != opt.AntiJoinOp { // Semi/Anti join only produce the left side columns. All other join types diff --git a/pkg/sql/opt/norm/project_builder.go b/pkg/sql/opt/norm/project_builder.go index a71c0da791d8..610ee6f156bc 100644 --- a/pkg/sql/opt/norm/project_builder.go +++ b/pkg/sql/opt/norm/project_builder.go @@ -19,22 +19,23 @@ import ( // input with new synthesized and passthrough columns. Sample usage: // // var pb projectBuilder -// pb.init(c) +// pb.init(c, passthrough) // e1 := pb.add(some expression) // e2 := pb.add(some other expression) -// augmentedInput := pb.buildProject(input, passthrough) +// augmentedInput := pb.buildProject(input) // // e1 and e2 are VariableOp expressions, with input columns // // produced by augmentedInput. // type projectBuilder struct { - f *Factory + c *CustomFuncs projections memo.ProjectionsExpr + passthrough opt.ColSet } -func (pb *projectBuilder) init(f *Factory) { +func (pb *projectBuilder) init(c *CustomFuncs, passthrough opt.ColSet) { // This initialization pattern ensures that fields are not unwittingly // reused. Field reuse must be explicit. - *pb = projectBuilder{f: f} + *pb = projectBuilder{c: c, passthrough: passthrough} } // empty returns true if there are no synthesized columns (and hence a @@ -43,26 +44,62 @@ func (pb *projectBuilder) empty() bool { return len(pb.projections) == 0 } -// add incorporates the given expression as a projection, unless the expression -// is already a "bare" variable. Returns a bare variable expression referring to -// the synthesized column. +// add attempts to incorporate the given expression as a projection. If the +// expression is already a "bare" variable, no projection is created and the +// variable is returned. If the expression matches a computed column expression +// in a base table, then the computed column is used as the projection column +// and is returned. If this computed column already exists as a passthrough +// column, then no projection is added. If the expression is not a "bare" +// variable and does not match a computed column expression, a new column is +// synthesized, a projection is created, and the synthesized column is returned. func (pb *projectBuilder) add(e opt.ScalarExpr) opt.ScalarExpr { if v, ok := e.(*memo.VariableExpr); ok { // The expression is a bare variable; we don't need to synthesize a column. return v } - newCol := pb.f.Metadata().AddColumn("", e.DataType()) - pb.projections = append(pb.projections, pb.f.ConstructProjectionsItem(e, newCol)) - return pb.f.ConstructVariable(newCol) + // Look for a computed column in a base table with an identical expression. + // If one exists, we can use it as the projection column instead of + // synthesizing a new column. + var projectCol opt.ColumnID + if cols := pb.c.OuterCols(e); !cols.Empty() { + // Get the base table of the first column referenced in e. + col, _ := cols.Next(0) + if tabID := pb.c.f.Metadata().ColumnMeta(col).Table; tabID != 0 { + // If the column has a base table (i.e., it is not a synthesized + // column), search for a computed column expression in the base + // table identical to e. + for compCol, expr := range pb.c.f.Metadata().TableMeta(tabID).ComputedCols { + if e == expr { + projectCol = compCol + break + } + } + } + } + + // If the input to the Project already contains the computed column, then + // there is no need to re-project its expression. + if pb.passthrough.Contains(projectCol) { + return pb.c.f.ConstructVariable(projectCol) + } + + // If we did not find an existing computed column with the same expression, + // synthesize a new column. + if projectCol == 0 { + projectCol = pb.c.f.Metadata().AddColumn("", e.DataType()) + } + + pb.projections = append(pb.projections, pb.c.f.ConstructProjectionsItem(e, projectCol)) + return pb.c.f.ConstructVariable(projectCol) } // buildProject creates the ProjectOp (if needed). The ProjectOp passes through // the given passthrough columns and adds any synthesized columns. -func (pb *projectBuilder) buildProject(input memo.RelExpr, passthrough opt.ColSet) memo.RelExpr { +func (pb *projectBuilder) buildProject(input memo.RelExpr) memo.RelExpr { if pb.empty() { // Avoid creating a Project that does nothing and just gets elided. return input } - return pb.f.ConstructProject(input, pb.projections, passthrough) + return pb.c.f.ConstructProject(input, pb.projections, pb.passthrough) } diff --git a/pkg/sql/opt/norm/testdata/rules/join b/pkg/sql/opt/norm/testdata/rules/join index a20d047cc755..22a940b3d7e3 100644 --- a/pkg/sql/opt/norm/testdata/rules/join +++ b/pkg/sql/opt/norm/testdata/rules/join @@ -26,7 +26,11 @@ CREATE TABLE uv (u INT PRIMARY KEY, v INT) ---- exec-ddl -CREATE TABLE booleans(a BOOL, b BOOL, c BOOL, d BOOL, e BOOL) +CREATE TABLE booleans (a BOOL, b BOOL, c BOOL, d BOOL, e BOOL) +---- + +exec-ddl +CREATE TABLE comp (i INT, c INT AS (i + 10) STORED, v INT AS (abs(i)) VIRTUAL) ---- norm @@ -3199,6 +3203,70 @@ project ├── v:6 = (x:1 + u:5) [outer=(1,5,6), immutable, constraints=(/6: (/NULL - ]), fd=(1,5)-->(6)] └── column9:9 = u:5 [outer=(5,9), constraints=(/5: (/NULL - ]; /9: (/NULL - ]), fd=(5)==(9), (9)==(5)] +# Computed columns with matching expressions should be reused as projection +# columns. +norm expect=ExtractJoinEqualities +SELECT * FROM xy JOIN comp ON x=i+10 AND y=abs(i) +---- +inner-join (hash) + ├── columns: x:1!null y:2!null i:5 c:6!null v:7!null + ├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one) + ├── immutable + ├── fd: (1)-->(2), (5)-->(6,7), (1)==(6), (6)==(1), (2)==(7), (7)==(2) + ├── scan xy + │ ├── columns: x:1!null y:2 + │ ├── key: (1) + │ └── fd: (1)-->(2) + ├── project + │ ├── columns: v:7 i:5 c:6 + │ ├── immutable + │ ├── fd: (5)-->(6,7) + │ ├── scan comp + │ │ ├── columns: i:5 c:6 + │ │ ├── computed column expressions + │ │ │ ├── c:6 + │ │ │ │ └── i:5 + 10 + │ │ │ └── v:7 + │ │ │ └── abs(i:5) + │ │ └── fd: (5)-->(6) + │ └── projections + │ └── abs(i:5) [as=v:7, outer=(5), immutable] + └── filters + ├── x:1 = c:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + └── y:2 = v:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] + +# Computed columns with matching expressions should be reused as projection +# columns. +norm expect=ExtractJoinEqualities +SELECT * FROM xy JOIN comp ON i+10=x AND abs(i)=y +---- +inner-join (hash) + ├── columns: x:1!null y:2!null i:5 c:6!null v:7!null + ├── multiplicity: left-rows(zero-or-more), right-rows(zero-or-one) + ├── immutable + ├── fd: (1)-->(2), (5)-->(6,7), (1)==(6), (6)==(1), (2)==(7), (7)==(2) + ├── scan xy + │ ├── columns: x:1!null y:2 + │ ├── key: (1) + │ └── fd: (1)-->(2) + ├── project + │ ├── columns: v:7 i:5 c:6 + │ ├── immutable + │ ├── fd: (5)-->(6,7) + │ ├── scan comp + │ │ ├── columns: i:5 c:6 + │ │ ├── computed column expressions + │ │ │ ├── c:6 + │ │ │ │ └── i:5 + 10 + │ │ │ └── v:7 + │ │ │ └── abs(i:5) + │ │ └── fd: (5)-->(6) + │ └── projections + │ └── abs(i:5) [as=v:7, outer=(5), immutable] + └── filters + ├── x:1 = c:6 [outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)] + └── y:2 = v:7 [outer=(2,7), constraints=(/2: (/NULL - ]; /7: (/NULL - ]), fd=(2)==(7), (7)==(2)] + # Cases with non-extractable equality. norm expect-not=ExtractJoinEqualities SELECT * FROM xy FULL OUTER JOIN uv ON x=u diff --git a/pkg/sql/opt/xform/testdata/rules/join b/pkg/sql/opt/xform/testdata/rules/join index 85d1b6c6e5f2..900333b9313e 100644 --- a/pkg/sql/opt/xform/testdata/rules/join +++ b/pkg/sql/opt/xform/testdata/rules/join @@ -4140,6 +4140,24 @@ project │ └── columns: m:1 └── filters (true) +# Covering case. Join on virtual column expression but do not produce it. +opt expect=GenerateLookupJoinsWithVirtualCols +SELECT m, virt.k FROM small INNER LOOKUP JOIN virt ON m = virt.i + 10 +---- +project + ├── columns: m:1!null k:6!null + ├── immutable + ├── fd: (6)-->(1) + └── inner-join (lookup virt@v1) + ├── columns: m:1!null k:6!null v1:9!null + ├── flags: force lookup join (into right side) + ├── key columns: [1] = [9] + ├── immutable + ├── fd: (6)-->(9), (1)==(9), (9)==(1) + ├── scan small + │ └── columns: m:1 + └── filters (true) + # Covering case. Produce virtual column. opt expect=GenerateLookupJoinsWithVirtualCols SELECT m, virt.k, virt.v1 FROM small INNER LOOKUP JOIN virt ON m = virt.v1 @@ -4154,6 +4172,20 @@ inner-join (lookup virt@v1) │ └── columns: m:1 └── filters (true) +# Covering case. Join on virtual column expression and produce it. +opt expect=GenerateLookupJoinsWithVirtualCols +SELECT m, virt.k, virt.v1 FROM small INNER LOOKUP JOIN virt ON m = virt.i + 10 +---- +inner-join (lookup virt@v1) + ├── columns: m:1!null k:6!null v1:9!null + ├── flags: force lookup join (into right side) + ├── key columns: [1] = [9] + ├── immutable + ├── fd: (6)-->(9), (1)==(9), (9)==(1) + ├── scan small + │ └── columns: m:1 + └── filters (true) + # Non-covering. opt expect=GenerateLookupJoinsWithVirtualCols SELECT m, virt.i, virt.v1 FROM small INNER LOOKUP JOIN virt ON m = virt.v1 @@ -4174,6 +4206,26 @@ inner-join (lookup virt) │ └── filters (true) └── filters (true) +# Non-covering. Join on virtual column expression. +opt expect=GenerateLookupJoinsWithVirtualCols +SELECT m, virt.i, virt.v1 FROM small INNER LOOKUP JOIN virt ON m = virt.i + 10 +---- +inner-join (lookup virt) + ├── columns: m:1!null i:7 v1:9!null + ├── key columns: [6] = [6] + ├── lookup columns are key + ├── immutable + ├── fd: (7)-->(9), (1)==(9), (9)==(1) + ├── inner-join (lookup virt@v1) + │ ├── columns: m:1!null k:6!null v1:9!null + │ ├── flags: force lookup join (into right side) + │ ├── key columns: [1] = [9] + │ ├── fd: (6)-->(9), (1)==(9), (9)==(1) + │ ├── scan small + │ │ └── columns: m:1 + │ └── filters (true) + └── filters (true) + # Do not generate a lookup join when the right side projects a column that is # not a virtual computed column. opt expect-not=GenerateLookupJoinsWithVirtualCols @@ -4288,7 +4340,7 @@ semi-join (hash) │ └── projections │ └── i + 10 └── filters - └── m = column14 + └── m = v1 # Do not generate lookup joins with virtual columns for anti-joins. opt expect-not=GenerateLookupJoinsWithVirtualCols format=hide-all @@ -4310,7 +4362,7 @@ anti-join (hash) │ └── projections │ └── i + 10 └── filters - └── m = column14 + └── m = v1 exec-ddl DROP INDEX v1 @@ -4521,6 +4573,26 @@ project └── filters └── i:7 > 0 [outer=(7), constraints=(/7: [/1 - ]; tight)] +# Covering case. Join on virtual column expression with an extra filter on the +# non-virtual column. +opt expect=GenerateLookupJoinsWithVirtualColsAndFilter +SELECT m, virt.i FROM small INNER LOOKUP JOIN virt ON m = virt.i + 10 AND i > 0 +---- +project + ├── columns: m:1!null i:7!null + ├── immutable + ├── fd: (7)-->(1) + └── inner-join (lookup virt@v1_storing_i) + ├── columns: m:1!null i:7!null v1:9!null + ├── flags: force lookup join (into right side) + ├── key columns: [1] = [9] + ├── immutable + ├── fd: (7)-->(9), (1)==(9), (9)==(1) + ├── scan small + │ └── columns: m:1 + └── filters + └── i:7 > 0 [outer=(7), constraints=(/7: [/1 - ]; tight)] + # Covering case. Virtual column is the lookup column and there is an extra # filter on the non-virtual column, but the column is not selected. We do not # handle this case yet. @@ -4595,6 +4667,31 @@ project └── filters └── j:8 > 0 [outer=(8), constraints=(/8: [/1 - ]; tight)] +# Non-covering case. Join on virtual column expression with an extra filter on the +# non-virtual column. +opt expect=GenerateLookupJoinsWithVirtualColsAndFilter +SELECT m, virt.j FROM small INNER LOOKUP JOIN virt ON m = virt.i + 10 AND j > 0 +---- +project + ├── columns: m:1!null j:8!null + ├── immutable + └── inner-join (lookup virt) + ├── columns: m:1!null j:8!null v1:9!null + ├── key columns: [6] = [6] + ├── lookup columns are key + ├── immutable + ├── fd: (1)==(9), (9)==(1) + ├── inner-join (lookup virt@v1_storing_i) + │ ├── columns: m:1!null k:6!null v1:9!null + │ ├── flags: force lookup join (into right side) + │ ├── key columns: [1] = [9] + │ ├── fd: (6)-->(9), (1)==(9), (9)==(1) + │ ├── scan small + │ │ └── columns: m:1 + │ └── filters (true) + └── filters + └── j:8 > 0 [outer=(8), constraints=(/8: [/1 - ]; tight)] + # Non-covering case. Virtual column is the lookup column and there is an extra # filter on a column not in the index, but the column is not selected. We do not # handle this case yet.