Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: hoist uncorrelated equality subqueries #100881

Merged
merged 1 commit into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3514,6 +3514,10 @@ func (m *sessionDataMutator) SetOptimizerAlwaysUseHistograms(val bool) {
m.data.OptimizerAlwaysUseHistograms = val
}

func (m *sessionDataMutator) SetOptimizerHoistUncorrelatedEqualitySubqueries(val bool) {
m.data.OptimizerHoistUncorrelatedEqualitySubqueries = val
}

func (m *sessionDataMutator) SetEnableCreateStatsUsingExtremes(val bool) {
m.data.EnableCreateStatsUsingExtremes = val
}
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -5278,6 +5278,7 @@ on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer on
optimizer_always_use_histograms on
optimizer_hoist_uncorrelated_equality_subqueries on
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2759,6 +2759,7 @@ null_ordered_last off NULL
on_update_rehome_row_enabled on NULL NULL NULL string
opt_split_scan_limit 2048 NULL NULL NULL string
optimizer_always_use_histograms on NULL NULL NULL string
optimizer_hoist_uncorrelated_equality_subqueries on NULL NULL NULL string
optimizer_use_forecasts on NULL NULL NULL string
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_improved_disjunction_stats on NULL NULL NULL string
Expand Down Expand Up @@ -2913,6 +2914,7 @@ null_ordered_last off NULL
on_update_rehome_row_enabled on NULL user NULL on on
opt_split_scan_limit 2048 NULL user NULL 2048 2048
optimizer_always_use_histograms on NULL user NULL on on
optimizer_hoist_uncorrelated_equality_subqueries on NULL user NULL on on
optimizer_use_forecasts on NULL user NULL on on
optimizer_use_histograms on NULL user NULL on on
optimizer_use_improved_disjunction_stats on NULL user NULL on on
Expand Down Expand Up @@ -3067,6 +3069,7 @@ on_update_rehome_row_enabled NULL NULL NULL
opt_split_scan_limit NULL NULL NULL NULL NULL
optimizer NULL NULL NULL NULL NULL
optimizer_always_use_histograms NULL NULL NULL NULL NULL
optimizer_hoist_uncorrelated_equality_subqueries NULL NULL NULL NULL NULL
optimizer_use_forecasts NULL NULL NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ null_ordered_last off
on_update_rehome_row_enabled on
opt_split_scan_limit 2048
optimizer_always_use_histograms on
optimizer_hoist_uncorrelated_equality_subqueries on
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
Expand Down
80 changes: 37 additions & 43 deletions pkg/sql/opt/exec/execbuilder/testdata/subquery
Original file line number Diff line number Diff line change
Expand Up @@ -121,64 +121,58 @@ vectorized: true
• root
│ columns: (a, b, c)
├── • filter
├── • project
│ │ columns: (a, b, c)
│ │ estimated row count: 333 (missing stats)
│ │ filter: a = @S2
│ │
│ └── • scan
│ columns: (a, b, c)
│ estimated row count: 1,000 (missing stats)
│ table: abc@abc_pkey
│ spans: FULL SCAN
├── • subquery
│ │ id: @S1
│ │ original sql: (SELECT * FROM abc WHERE c = (a + 3))
│ │ exec mode: one row
│ │
│ └── • render
│ │ columns: (column16)
│ │ render column16: true
│ └── • lookup join (inner)
│ │ columns: (any_not_null, a, b, c)
│ │ estimated row count: 1 (missing stats)
│ │ table: abc@abc_pkey
│ │ equality: (any_not_null) = (a)
│ │ equality cols are key
│ │
│ └── • limit
│ │ columns: (a, c)
│ │ count: 1
│ └── • group (scalar)
│ │ columns: (any_not_null)
│ │ estimated row count: 1 (missing stats)
│ │ aggregate 0: any_not_null(a)
│ │
│ └── • filter
│ │ columns: (a, c)
│ │ estimated row count: 330 (missing stats)
│ │ filter: c = (a + 3)
│ └── • limit
│ │ columns: (a)
│ │ count: 1
│ │
│ └── • scan
│ columns: (a, c)
│ estimated row count: 1,000 (missing stats)
│ table: abc@abc_pkey
│ spans: FULL SCAN (SOFT LIMIT)
│ └── • filter
│ │ columns: (a)
│ │ ordering: -a
│ │ estimated row count: 333 (missing stats)
│ │ filter: COALESCE(@S1, false)
│ │
│ └── • revscan
│ columns: (a)
│ ordering: -a
│ estimated row count: 1,000 (missing stats)
│ table: abc@abc_pkey
│ spans: FULL SCAN (SOFT LIMIT)
└── • subquery
│ id: @S2
│ original sql: (SELECT max(a) FROM abc WHERE EXISTS (SELECT * FROM abc WHERE c = (a + 3)))
│ id: @S1
│ original sql: (SELECT * FROM abc WHERE c = (a + 3))
│ exec mode: one row
└── • group (scalar)
│ columns: (any_not_null)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(a)
└── • render
│ columns: (column16)
│ render column16: true
└── • limit
│ columns: (a)
│ columns: (a, c)
│ count: 1
└── • filter
│ columns: (a)
│ ordering: -a
│ estimated row count: 333 (missing stats)
│ filter: COALESCE(@S1, false)
│ columns: (a, c)
│ estimated row count: 330 (missing stats)
│ filter: c = (a + 3)
└── • revscan
columns: (a)
ordering: -a
└── • scan
columns: (a, c)
estimated row count: 1,000 (missing stats)
table: abc@abc_pkey
spans: FULL SCAN (SOFT LIMIT)
Expand Down
23 changes: 13 additions & 10 deletions pkg/sql/opt/exec/execbuilder/testdata/tpch_vec
Original file line number Diff line number Diff line change
Expand Up @@ -20848,17 +20848,20 @@ EXPLAIN (VEC) SELECT s_suppkey, s_name, s_address, s_phone, total_revenue FROM s
----
└ Node 1
└ *colexecjoin.mergeJoinInnerOp
├ *colfetcher.ColBatchScan
└ *rowexec.joinReader
└ *colexec.sortOp
└ *colexecsel.selEQFloat64Float64Op
└ *colexecbase.castOpNullAny
└ *colexecbase.constNullOp
└ *colexec.hashAggregator
└ *colexecproj.projMultFloat64Float64Op
└ *colexecprojconst.projMinusFloat64ConstFloat64Op
└ *colfetcher.ColIndexJoin
└ *colfetcher.ColBatchScan
└ *colexecjoin.hashJoiner
├ *colexec.hashAggregator
│ └ *colexecproj.projMultFloat64Float64Op
│ └ *colexecprojconst.projMinusFloat64ConstFloat64Op
│ └ *colfetcher.ColIndexJoin
│ └ *colfetcher.ColBatchScan
└ *colexec.orderedAggregator
└ *colexec.hashAggregator
└ *colexecproj.projMultFloat64Float64Op
└ *colexecprojconst.projMinusFloat64ConstFloat64Op
└ *colfetcher.ColIndexJoin
└ *colfetcher.ColBatchScan

statement ok
DROP VIEW revenue0
Expand Down
60 changes: 30 additions & 30 deletions pkg/sql/opt/exec/execbuilder/testdata/udf
Original file line number Diff line number Diff line change
Expand Up @@ -116,48 +116,48 @@ EXPLAIN (VERBOSE) SELECT * FROM sub3 WHERE sub_fn() = 3 AND (SELECT max(a) FROM
distribution: local
vectorized: true
·
root
project
│ columns: (a)
├── • filter
│ │ columns: (a)
│ │ estimated row count: 111 (missing stats)
│ │ filter: (sub_fn() = 3) AND (a = @S1)
│ │
│ └── • scan
│ columns: (a)
│ estimated row count: 1,000 (missing stats)
│ table: sub3@sub3_pkey
│ spans: FULL SCAN
└── • subquery
│ id: @S1
│ original sql: (SELECT max(a) FROM sub2)
│ exec mode: one row
└── • lookup join (inner)
│ columns: (any_not_null, a)
│ estimated row count: 1 (missing stats)
│ table: sub3@sub3_pkey
│ equality: (any_not_null) = (a)
│ equality cols are key
│ pred: sub_fn() = 3
└── • group (scalar)
└── • filter
│ columns: (any_not_null)
│ estimated row count: 1 (missing stats)
aggregate 0: any_not_null(a)
│ estimated row count: 0 (missing stats)
filter: sub_fn() = 3
└── • revscan
columns: (a)
estimated row count: 1 (missing stats)
table: sub2@sub2_pkey
spans: LIMITED SCAN
limit: 1
└── • group (scalar)
│ columns: (any_not_null)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(a)
└── • revscan
columns: (a)
estimated row count: 1 (missing stats)
table: sub2@sub2_pkey
spans: LIMITED SCAN
limit: 1

statement ok
CREATE FUNCTION sub_fn_lt() RETURNS INT LANGUAGE SQL AS 'SELECT a FROM sub1 WHERE a < (SELECT max(a) FROM sub2)'

# The uncorrelated subquery in the UDF body is executed only once.
query T kvtrace
SELECT sub_fn()
SELECT sub_fn_lt()
----
Scan /Table/112/{1-2}
Scan /Table/113/{1-2}

# The uncorrelated subquery in the UDF body is executed only once per row
# produced by generate_series.
query T kvtrace
SELECT sub_fn() FROM generate_series(1, 3)
SELECT sub_fn_lt() FROM generate_series(1, 3)
----
Scan /Table/112/{1-2}
Scan /Table/113/{1-2}
Expand All @@ -174,12 +174,12 @@ CREATE FUNCTION sub_fn2() RETURNS INT LANGUAGE SQL AS 'SELECT a FROM sub1 WHERE
query T kvtrace
SELECT sub_fn2() FROM generate_series(1, 3)
----
Scan /Table/112/{1-2}
Scan /Table/113/1/30/0
Scan /Table/112/{1-2}
Scan /Table/112/1/30/0
Scan /Table/113/1/30/0
Scan /Table/112/{1-2}
Scan /Table/112/1/30/0
Scan /Table/113/1/30/0
Scan /Table/112/1/30/0

statement ok
CREATE FUNCTION sub_fn3() RETURNS INT LANGUAGE SQL AS 'SELECT a FROM sub1 WHERE EXISTS (SELECT a FROM sub2 WHERE a = 30)'
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ type Memo struct {
useLimitOrderingForStreamingGroupBy bool
useImprovedSplitDisjunctionForJoins bool
alwaysUseHistograms bool
hoistUncorrelatedEqualitySubqueries bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -221,6 +222,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) {
useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy,
useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins,
alwaysUseHistograms: evalCtx.SessionData().OptimizerAlwaysUseHistograms,
hoistUncorrelatedEqualitySubqueries: evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries,
}
m.metadata.Init()
m.logPropsBuilder.init(ctx, evalCtx, m)
Expand Down Expand Up @@ -362,7 +364,8 @@ func (m *Memo) IsStale(
m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats ||
m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy ||
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins ||
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms {
m.alwaysUseHistograms != evalCtx.SessionData().OptimizerAlwaysUseHistograms ||
m.hoistUncorrelatedEqualitySubqueries != evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().OptimizerAlwaysUseHistograms = false
notStale()

// Stale optimizer_hoist_uncorrelated_equality_subqueries.
evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = true
stale()
evalCtx.SessionData().OptimizerHoistUncorrelatedEqualitySubqueries = false
notStale()

// Stale data sources and schema. Create new catalog so that data sources are
// recreated and can be modified independently.
catalog = testcat.New()
Expand Down
Loading