Skip to content

Commit

Permalink
opt: allow join elimination rules to remap columns
Browse files Browse the repository at this point in the history
When it can be proven that a join does not add rows to or remove them
from one of its inputs, the other input can often be removed, eliminating
the join. However, this can only be done if the columns from the
eliminated side are not needed.

This patch allows the join elimination rules to remap columns from the
eliminated side to the preserved side of the join, using the join's
functional dependencies. For example:
```
CREATE TABLE xy (x INT PRIMARY KEY, y INT);
CREATE TABLE fk (k INT PRIMARY KEY, v INT NOT NULL, FOREIGN KEY (v) REFERENCES xy (x));

SELECT x, k, v FROM fk INNER JOIN xy ON v = x;
```
In the example above, the join could not previously be eliminated because
the `x` column is required in the output. Now, the `x` column is remapped
to the equivalent `v` column, allowing the join to be removed.

Fixes cockroachdb#102614

Release note (performance improvement): The optimizer can now eliminate
joins in more cases.
  • Loading branch information
DrewKimball committed Jun 20, 2023
1 parent 7cbcd3f commit 040b95a
Show file tree
Hide file tree
Showing 17 changed files with 328 additions and 223 deletions.
22 changes: 4 additions & 18 deletions pkg/sql/opt/exec/execbuilder/testdata/join
Original file line number Diff line number Diff line change
Expand Up @@ -543,18 +543,11 @@ EXPLAIN SELECT * FROM cards LEFT OUTER JOIN customers ON customers.id = cards.cu
distribution: local
vectorized: true
·
• merge join
│ equality: (cust) = (id)
│ right cols are key
├── • scan
│ missing stats
│ table: cards@cards_cust_idx
│ spans: FULL SCAN
• render
└── • scan
missing stats
table: customers@customers_pkey
table: cards@cards_pkey
spans: FULL SCAN

# Tests for filter propagation through joins.
Expand Down Expand Up @@ -2220,16 +2213,9 @@ EXPLAIN SELECT * FROM cards LEFT OUTER HASH JOIN customers ON customers.id = car
distribution: local
vectorized: true
·
• hash join
│ equality: (cust) = (id)
│ right cols are key
├── • scan
│ missing stats
│ table: cards@cards_pkey
│ spans: FULL SCAN
• render
└── • scan
missing stats
table: customers@customers_pkey
table: cards@cards_pkey
spans: FULL SCAN
39 changes: 15 additions & 24 deletions pkg/sql/opt/exec/execbuilder/testdata/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -184,33 +184,24 @@ EXPLAIN (VERBOSE) SELECT a FROM abc WHERE a IN (SELECT a FROM abc WHERE b < 0)
distribution: local
vectorized: true
·
merge join (semi)
project
│ columns: (a)
│ estimated row count: 333 (missing stats)
│ equality: (a) = (a)
│ left cols are key
│ right cols are key
│ merge ordering: +"(a=a)"
├── • scan
│ columns: (a)
│ ordering: +a
│ estimated row count: 1,000 (missing stats)
│ table: abc@abc_pkey
│ spans: FULL SCAN
└── • filter
│ columns: (a, b)
│ ordering: +a
│ estimated row count: 333 (missing stats)
│ filter: b < 0
└── • render
│ columns: (a, a)
│ render a: a
│ render a: a
└── • scan
columns: (a, b)
ordering: +a
estimated row count: 1,000 (missing stats)
table: abc@abc_pkey
spans: FULL SCAN
└── • filter
│ columns: (a, b)
│ estimated row count: 333 (missing stats)
│ filter: b < 0
└── • scan
columns: (a, b)
estimated row count: 1,000 (missing stats)
table: abc@abc_pkey
spans: FULL SCAN

query T
EXPLAIN SELECT * FROM (SELECT * FROM (VALUES (1, 8, 8), (3, 1, 1), (2, 4, 4)) AS moo (moo1, moo2, moo3) ORDER BY moo2) as foo (foo1) ORDER BY foo1
Expand Down
9 changes: 4 additions & 5 deletions pkg/sql/opt/exec/execbuilder/testdata/upsert
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,11 @@ vectorized: true
│ auto commit
│ arbiter indexes: kv_pkey
└── • lookup join (inner)
└── • render
│ columns: (k, v_default, k)
│ estimated row count: 2 (missing stats)
│ table: kv@kv_pkey
│ equality: (k) = (k)
│ equality cols are key
│ render k: k
│ render k: k
│ render v_default: v_default
└── • distinct
│ columns: (v_default, k)
Expand Down
92 changes: 31 additions & 61 deletions pkg/sql/opt/memo/testdata/logprops/join
Original file line number Diff line number Diff line change
Expand Up @@ -2427,41 +2427,33 @@ INNER JOIN (SELECT xysd.x, a.x AS t FROM xysd INNER JOIN xysd AS a ON xysd.x = a
----
inner-join (hash)
├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int) x:7(int!null) t:13(int!null)
├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more)
├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more)
├── key: (1)
├── fd: (1)-->(2-4), (7)==(3,13), (13)==(3,7), (3)==(7,13)
├── prune: (1,2,4)
├── interesting orderings: (+1) (+7) (+13)
├── prune: (1,2,4,7)
├── interesting orderings: (+1) (+(7|13))
├── scan fk
│ ├── columns: k:1(int!null) v:2(int) r1:3(int!null) r2:4(int)
│ ├── key: (1)
│ ├── fd: (1)-->(2-4)
│ ├── prune: (1-4)
│ ├── interesting orderings: (+1)
│ └── unfiltered-cols: (1-6)
├── inner-join (hash)
│ ├── columns: xysd.x:7(int!null) a.x:13(int!null)
│ ├── multiplicity: left-rows(exactly-one), right-rows(exactly-one)
│ ├── key: (13)
├── project
│ ├── columns: a.x:13(int!null) xysd.x:7(int!null)
│ ├── key: (7)
│ ├── fd: (7)==(13), (13)==(7)
│ ├── interesting orderings: (+7) (+13)
│ ├── unfiltered-cols: (7-18)
│ ├── prune: (7,13)
│ ├── interesting orderings: (+(7|13))
│ ├── unfiltered-cols: (7-12)
│ ├── scan xysd
│ │ ├── columns: xysd.x:7(int!null)
│ │ ├── key: (7)
│ │ ├── prune: (7)
│ │ ├── interesting orderings: (+7)
│ │ └── unfiltered-cols: (7-12)
│ ├── scan xysd [as=a]
│ │ ├── columns: a.x:13(int!null)
│ │ ├── key: (13)
│ │ ├── prune: (13)
│ │ ├── interesting orderings: (+13)
│ │ └── unfiltered-cols: (13-18)
│ └── filters
│ └── eq [type=bool, outer=(7,13), constraints=(/7: (/NULL - ]; /13: (/NULL - ]), fd=(7)==(13), (13)==(7)]
│ ├── variable: xysd.x:7 [type=int]
│ └── variable: a.x:13 [type=int]
│ └── projections
│ └── variable: xysd.x:7 [as=a.x:13, type=int, outer=(7)]
└── filters
└── eq [type=bool, outer=(3,13), constraints=(/3: (/NULL - ]; /13: (/NULL - ]), fd=(3)==(13), (13)==(3)]
├── variable: r1:3 [type=int]
Expand Down Expand Up @@ -2683,11 +2675,10 @@ FROM (SELECT r1, r2, r3 FROM ref WHERE r2 IS NOT NULL)
INNER JOIN abc
ON (r1, r2, r3) = (a, b, c)
----
inner-join (hash)
project
├── columns: r1:1(int!null) r2:2(int!null) r3:3(int!null) a:7(int!null) b:8(int!null) c:9(int!null)
├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more)
├── fd: (1)==(7), (7)==(1), (2)==(8), (8)==(2), (3)==(9), (9)==(3)
├── interesting orderings: (+7,+8,+9)
├── prune: (1-3,7-9)
├── select
│ ├── columns: r1:1(int!null) r2:2(int!null) r3:3(int!null)
│ ├── prune: (1,3)
Expand All @@ -2698,22 +2689,10 @@ inner-join (hash)
│ └── is-not [type=bool, outer=(2), constraints=(/2: (/NULL - ]; tight)]
│ ├── variable: r2:2 [type=int]
│ └── null [type=unknown]
├── scan abc
│ ├── columns: a:7(int!null) b:8(int!null) c:9(int!null)
│ ├── key: (7-9)
│ ├── prune: (7-9)
│ ├── interesting orderings: (+7,+8,+9)
│ └── unfiltered-cols: (7-11)
└── filters
├── eq [type=bool, outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)]
│ ├── variable: r1:1 [type=int]
│ └── variable: a:7 [type=int]
├── eq [type=bool, outer=(2,8), constraints=(/2: (/NULL - ]; /8: (/NULL - ]), fd=(2)==(8), (8)==(2)]
│ ├── variable: r2:2 [type=int]
│ └── variable: b:8 [type=int]
└── eq [type=bool, outer=(3,9), constraints=(/3: (/NULL - ]; /9: (/NULL - ]), fd=(3)==(9), (9)==(3)]
├── variable: r3:3 [type=int]
└── variable: c:9 [type=int]
└── projections
├── variable: r1:1 [as=a:7, type=int, outer=(1)]
├── variable: r2:2 [as=b:8, type=int, outer=(2)]
└── variable: r3:3 [as=c:9, type=int, outer=(3)]

# Case with a not-null multi-column foreign key and an equality on only one of
# the foreign key columns.
Expand Down Expand Up @@ -2796,15 +2775,15 @@ limit
├── columns: t_st_id:1(int!null) t_tt_id:2(int!null) t_s_symb:3(int!null) st_id:7(int!null) tt_id:10(int!null) s_symb:13(int!null) s_st_id:14(int!null) s_ex_id:15(int!null) ex_id:18(int!null)
├── cardinality: [0 - 50]
├── fd: (13)-->(14,15), (15)==(18), (18)==(15), (1)==(7), (7)==(1), (2)==(10), (10)==(2), (3)==(13), (13)==(3)
├── prune: (14)
├── interesting orderings: (+7) (+10) (+13) (+18)
├── prune: (14,15,18)
├── interesting orderings: (+7) (+10) (+13)
├── inner-join (hash)
│ ├── columns: t_st_id:1(int!null) t_tt_id:2(int!null) t_s_symb:3(int!null) st_id:7(int!null) tt_id:10(int!null) s_symb:13(int!null) s_st_id:14(int!null) s_ex_id:15(int!null) ex_id:18(int!null)
│ ├── multiplicity: left-rows(zero-or-one), right-rows(zero-or-more)
│ ├── fd: (13)-->(14,15), (15)==(18), (18)==(15), (1)==(7), (7)==(1), (2)==(10), (10)==(2), (3)==(13), (13)==(3)
│ ├── limit hint: 50.00
│ ├── prune: (14)
│ ├── interesting orderings: (+7) (+10) (+13) (+18)
│ ├── prune: (14,15,18)
│ ├── interesting orderings: (+7) (+10) (+13)
│ ├── scan trade
│ │ ├── columns: t_st_id:1(int!null) t_tt_id:2(int!null) t_s_symb:3(int!null)
│ │ ├── prune: (1-3)
Expand All @@ -2814,8 +2793,8 @@ limit
│ │ ├── multiplicity: left-rows(zero-or-more), right-rows(one-or-more)
│ │ ├── key: (7,10,13)
│ │ ├── fd: (13)-->(14,15), (15)==(18), (18)==(15)
│ │ ├── prune: (7,10,13,14)
│ │ ├── interesting orderings: (+7) (+10) (+13) (+18)
│ │ ├── prune: (7,10,13-15,18)
│ │ ├── interesting orderings: (+7) (+10) (+13)
│ │ ├── scan status_type
│ │ │ ├── columns: st_id:7(int!null)
│ │ │ ├── key: (7)
Expand All @@ -2826,21 +2805,20 @@ limit
│ │ │ ├── columns: tt_id:10(int!null) s_symb:13(int!null) s_st_id:14(int!null) s_ex_id:15(int!null) ex_id:18(int!null)
│ │ │ ├── key: (10,13)
│ │ │ ├── fd: (13)-->(14,15), (15)==(18), (18)==(15)
│ │ │ ├── prune: (10,13,14)
│ │ │ ├── interesting orderings: (+10) (+13) (+18)
│ │ │ ├── prune: (10,13-15,18)
│ │ │ ├── interesting orderings: (+10) (+13)
│ │ │ ├── scan trade_type
│ │ │ │ ├── columns: tt_id:10(int!null)
│ │ │ │ ├── key: (10)
│ │ │ │ ├── prune: (10)
│ │ │ │ ├── interesting orderings: (+10)
│ │ │ │ └── unfiltered-cols: (10-12)
│ │ │ ├── inner-join (hash)
│ │ │ │ ├── columns: s_symb:13(int!null) s_st_id:14(int!null) s_ex_id:15(int!null) ex_id:18(int!null)
│ │ │ │ ├── multiplicity: left-rows(exactly-one), right-rows(zero-or-more)
│ │ │ ├── project
│ │ │ │ ├── columns: ex_id:18(int!null) s_symb:13(int!null) s_st_id:14(int!null) s_ex_id:15(int!null)
│ │ │ │ ├── key: (13)
│ │ │ │ ├── fd: (13)-->(14,15), (15)==(18), (18)==(15)
│ │ │ │ ├── prune: (13,14)
│ │ │ │ ├── interesting orderings: (+13) (+18)
│ │ │ │ ├── prune: (13-15,18)
│ │ │ │ ├── interesting orderings: (+13)
│ │ │ │ ├── unfiltered-cols: (13-17)
│ │ │ │ ├── scan security
│ │ │ │ │ ├── columns: s_symb:13(int!null) s_st_id:14(int!null) s_ex_id:15(int!null)
Expand All @@ -2849,16 +2827,8 @@ limit
│ │ │ │ │ ├── prune: (13-15)
│ │ │ │ │ ├── interesting orderings: (+13)
│ │ │ │ │ └── unfiltered-cols: (13-17)
│ │ │ │ ├── scan exchange
│ │ │ │ │ ├── columns: ex_id:18(int!null)
│ │ │ │ │ ├── key: (18)
│ │ │ │ │ ├── prune: (18)
│ │ │ │ │ ├── interesting orderings: (+18)
│ │ │ │ │ └── unfiltered-cols: (18-20)
│ │ │ │ └── filters
│ │ │ │ └── eq [type=bool, outer=(15,18), constraints=(/15: (/NULL - ]; /18: (/NULL - ]), fd=(15)==(18), (18)==(15)]
│ │ │ │ ├── variable: ex_id:18 [type=int]
│ │ │ │ └── variable: s_ex_id:15 [type=int]
│ │ │ │ └── projections
│ │ │ │ └── variable: s_ex_id:15 [as=ex_id:18, type=int, outer=(15)]
│ │ │ └── filters (true)
│ │ └── filters (true)
│ └── filters
Expand Down
17 changes: 17 additions & 0 deletions pkg/sql/opt/norm/general_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,18 @@ func (c *CustomFuncs) MakeBoolCol() opt.ColumnID {
return c.mem.Metadata().AddColumn("", types.Bool)
}

// CanRemapCols returns true if it's possible to remap every column in the
// "from" set to a column in the "to" set using the given FDs.
func (c *CustomFuncs) CanRemapCols(from, to opt.ColSet, fds *props.FuncDepSet) bool {
for col, ok := from.Next(0); ok; col, ok = from.Next(col + 1) {
if !fds.ComputeEquivGroup(col).Intersects(to) {
// It is not possible to remap this column to one from the "to" set.
return false
}
}
return true
}

// ----------------------------------------------------------------------
//
// Outer column functions
Expand Down Expand Up @@ -567,6 +579,11 @@ func (c *CustomFuncs) sharedProps(e opt.Expr) *props.Shared {
}
}

// FuncDeps retrieves the FuncDepSet for the given expression.
func (c *CustomFuncs) FuncDeps(expr memo.RelExpr) *props.FuncDepSet {
return &expr.Relational().FuncDeps
}

// ----------------------------------------------------------------------
//
// Ordering functions
Expand Down
51 changes: 51 additions & 0 deletions pkg/sql/opt/norm/project_funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/cockroachdb/cockroach/pkg/sql/opt"
"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
"github.com/cockroachdb/cockroach/pkg/sql/opt/props"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/intsets"
Expand Down Expand Up @@ -849,3 +850,53 @@ func (c *CustomFuncs) IsStaticTuple(expr opt.ScalarExpr) bool {
}
return false
}

// ProjectRemappedCols creates a projection for each column in the "from" set
// that is not in the "to" set, mapping it to an equivalent column in the "to"
// set. ProjectRemappedCols panics if this is not possible.
func (c *CustomFuncs) ProjectRemappedCols(
from, to opt.ColSet, fds *props.FuncDepSet,
) (projections memo.ProjectionsExpr) {
for col, ok := from.Next(0); ok; col, ok = from.Next(col + 1) {
if !to.Contains(col) {
candidates := fds.ComputeEquivGroup(col)
candidates.IntersectionWith(to)
if candidates.Empty() {
panic(errors.AssertionFailedf("cannot remap column %v", col))
}
toCol, _ := candidates.Next(0)
projections = append(
projections,
c.f.ConstructProjectionsItem(c.f.ConstructVariable(toCol), col),
)
}
}
return projections
}

// RemapProjectionCols remaps column references in the given projections to
// refer to the "to" set.
func (c *CustomFuncs) RemapProjectionCols(
projections memo.ProjectionsExpr, to opt.ColSet, fds *props.FuncDepSet,
) memo.ProjectionsExpr {
getReplacement := func(col opt.ColumnID) opt.ColumnID {
candidates := fds.ComputeEquivGroup(col)
candidates.IntersectionWith(to)
if candidates.Empty() {
panic(errors.AssertionFailedf("cannot remap column"))
}
replacement, _ := candidates.Next(0)
return replacement
}

// Replace any references to the "from" columns in the projections.
var replace ReplaceFunc
replace = func(e opt.Expr) opt.Expr {
if v, ok := e.(*memo.VariableExpr); ok && !to.Contains(v.Col) {
// This variable needs to be remapped.
return c.f.ConstructVariable(getReplacement(v.Col))
}
return c.f.Replace(e, replace)
}
return *(replace(&projections).(*memo.ProjectionsExpr))
}
Loading

0 comments on commit 040b95a

Please sign in to comment.