Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: split disjunction in join conditions in more cases #97696

Merged
merged 2 commits into from
Feb 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/sql/exec_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3412,6 +3412,10 @@ func (m *sessionDataMutator) SetOptimizerUseLimitOrderingForStreamingGroupBy(val
m.data.OptimizerUseLimitOrderingForStreamingGroupBy = val
}

func (m *sessionDataMutator) SetOptimizerUseImprovedSplitDisjunctionForJoins(val bool) {
m.data.OptimizerUseImprovedSplitDisjunctionForJoins = val
}

// Utility functions related to scrubbing sensitive information on SQL Stats.

// quantizeCounts ensures that the Count field in the
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -5003,6 +5003,7 @@ optimizer on
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
optimizer_use_improved_split_disjunction_for_joins on
optimizer_use_limit_ordering_for_streaming_group_by on
optimizer_use_multicol_stats on
optimizer_use_not_visible_indexes off
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2642,6 +2642,7 @@ opt_split_scan_limit 2048 NULL
optimizer_use_forecasts on NULL NULL NULL string
optimizer_use_histograms on NULL NULL NULL string
optimizer_use_improved_disjunction_stats on NULL NULL NULL string
optimizer_use_improved_split_disjunction_for_joins on NULL NULL NULL string
optimizer_use_limit_ordering_for_streaming_group_by on NULL NULL NULL string
optimizer_use_multicol_stats on NULL NULL NULL string
optimizer_use_not_visible_indexes off NULL NULL NULL string
Expand Down Expand Up @@ -2786,6 +2787,7 @@ opt_split_scan_limit 2048 NULL
optimizer_use_forecasts on NULL user NULL on on
optimizer_use_histograms on NULL user NULL on on
optimizer_use_improved_disjunction_stats on NULL user NULL on on
optimizer_use_improved_split_disjunction_for_joins on NULL user NULL on on
optimizer_use_limit_ordering_for_streaming_group_by on NULL user NULL on on
optimizer_use_multicol_stats on NULL user NULL on on
optimizer_use_not_visible_indexes off NULL user NULL off off
Expand Down Expand Up @@ -2929,6 +2931,7 @@ optimizer NULL NULL NULL
optimizer_use_forecasts NULL NULL NULL NULL NULL
optimizer_use_histograms NULL NULL NULL NULL NULL
optimizer_use_improved_disjunction_stats NULL NULL NULL NULL NULL
optimizer_use_improved_split_disjunction_for_joins NULL NULL NULL NULL NULL
optimizer_use_limit_ordering_for_streaming_group_by NULL NULL NULL NULL NULL
optimizer_use_multicol_stats NULL NULL NULL NULL NULL
optimizer_use_not_visible_indexes NULL NULL NULL NULL NULL
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/logictest/testdata/logic_test/show_source
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ opt_split_scan_limit 2048
optimizer_use_forecasts on
optimizer_use_histograms on
optimizer_use_improved_disjunction_stats on
optimizer_use_improved_split_disjunction_for_joins on
optimizer_use_limit_ordering_for_streaming_group_by on
optimizer_use_multicol_stats on
optimizer_use_not_visible_indexes off
Expand Down
67 changes: 25 additions & 42 deletions pkg/sql/opt/exec/execbuilder/testdata/tpch_vec
Original file line number Diff line number Diff line change
Expand Up @@ -20925,48 +20925,31 @@ EXPLAIN (VEC) SELECT sum(l_extendedprice* (1 - l_discount)) AS revenue FROM line
└ *colexec.orderedAggregator
└ *colexecproj.projMultFloat64Float64Op
└ *colexecprojconst.projMinusFloat64ConstFloat64Op
└ *colexec.caseOp
├ *colexec.bufferOp
│ └ *colexecjoin.hashJoiner
│ ├ *colexecsel.selEQBytesBytesConstOp
│ │ └ *colexec.selectInOpBytes
│ │ └ *colfetcher.ColBatchScan
│ └ *colexecsel.selGEInt64Int64ConstOp
│ └ *colfetcher.ColBatchScan
├ *colexecbase.constBoolOp
│ └ *colexec.orProjOp
│ ├ *colexec.bufferOp
│ ├ *colexec.andProjOp
│ │ ├ *colexec.andProjOp
│ │ │ ├ *colexec.andProjOp
│ │ │ │ ├ *colexec.andProjOp
│ │ │ │ │ ├ *colexecprojconst.projEQBytesBytesConstOp
│ │ │ │ │ └ *colexec.projectInOpBytes
│ │ │ │ └ *colexecprojconst.projGEFloat64Float64ConstOp
│ │ │ └ *colexecprojconst.projLEFloat64Float64ConstOp
│ │ └ *colexecprojconst.projLEInt64Int64ConstOp
│ └ *colexec.andProjOp
│ ├ *colexec.andProjOp
│ │ ├ *colexec.andProjOp
│ │ │ ├ *colexec.andProjOp
│ │ │ │ ├ *colexecprojconst.projEQBytesBytesConstOp
│ │ │ │ └ *colexec.projectInOpBytes
│ │ │ └ *colexecprojconst.projGEFloat64Float64ConstOp
│ │ └ *colexecprojconst.projLEFloat64Float64ConstOp
│ └ *colexecprojconst.projLEInt64Int64ConstOp
├ *colexecbase.constBoolOp
│ └ *colexec.andProjOp
│ ├ *colexec.bufferOp
│ ├ *colexec.andProjOp
│ │ ├ *colexec.andProjOp
│ │ │ ├ *colexec.andProjOp
│ │ │ │ ├ *colexecprojconst.projEQBytesBytesConstOp
│ │ │ │ └ *colexec.projectInOpBytes
│ │ │ └ *colexecprojconst.projGEFloat64Float64ConstOp
│ │ └ *colexecprojconst.projLEFloat64Float64ConstOp
│ └ *colexecprojconst.projLEInt64Int64ConstOp
└ *colexecbase.constBoolOp
└ *colexec.bufferOp
└ *colexec.UnorderedDistinct
└ *colexec.SerialUnorderedSynchronizer
├ *rowexec.joinReader
│ └ *rowexec.joinReader
│ └ *colexec.selectInOpBytes
│ └ *colexecsel.selEQBytesBytesConstOp
│ └ *colexecsel.selLEInt64Int64ConstOp
│ └ *colexecsel.selGEInt64Int64ConstOp
│ └ *colfetcher.ColBatchScan
└ *colexec.UnorderedDistinct
└ *colexec.SerialUnorderedSynchronizer
├ *rowexec.joinReader
│ └ *rowexec.joinReader
│ └ *colexec.selectInOpBytes
│ └ *colexecsel.selEQBytesBytesConstOp
│ └ *colexecsel.selLEInt64Int64ConstOp
│ └ *colexecsel.selGEInt64Int64ConstOp
│ └ *colfetcher.ColBatchScan
└ *rowexec.joinReader
└ *rowexec.joinReader
└ *colexec.selectInOpBytes
└ *colexecsel.selEQBytesBytesConstOp
└ *colexecsel.selLEInt64Int64ConstOp
└ *colexecsel.selGEInt64Int64ConstOp
└ *colfetcher.ColBatchScan

# Query 20
query T
Expand Down
5 changes: 4 additions & 1 deletion pkg/sql/opt/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ type Memo struct {
allowOrdinalColumnReferences bool
useImprovedDisjunctionStats bool
useLimitOrderingForStreamingGroupBy bool
useImprovedSplitDisjunctionForJoins bool

// curRank is the highest currently in-use scalar expression rank.
curRank opt.ScalarRank
Expand Down Expand Up @@ -217,6 +218,7 @@ func (m *Memo) Init(ctx context.Context, evalCtx *eval.Context) {
allowOrdinalColumnReferences: evalCtx.SessionData().AllowOrdinalColumnReferences,
useImprovedDisjunctionStats: evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats,
useLimitOrderingForStreamingGroupBy: evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy,
useImprovedSplitDisjunctionForJoins: evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins,
}
m.metadata.Init()
m.logPropsBuilder.init(ctx, evalCtx, m)
Expand Down Expand Up @@ -356,7 +358,8 @@ func (m *Memo) IsStale(
m.variableInequalityLookupJoinEnabled != evalCtx.SessionData().VariableInequalityLookupJoinEnabled ||
m.allowOrdinalColumnReferences != evalCtx.SessionData().AllowOrdinalColumnReferences ||
m.useImprovedDisjunctionStats != evalCtx.SessionData().OptimizerUseImprovedDisjunctionStats ||
m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy {
m.useLimitOrderingForStreamingGroupBy != evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy ||
m.useImprovedSplitDisjunctionForJoins != evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins {
return true, nil
}

Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/opt/memo/memo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,12 @@ func TestMemoIsStale(t *testing.T) {
evalCtx.SessionData().OptimizerUseLimitOrderingForStreamingGroupBy = false
notStale()

// Stale use improved split disjunction for joins.
evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins = true
stale()
evalCtx.SessionData().OptimizerUseImprovedSplitDisjunctionForJoins = false
notStale()

// Stale testing_optimizer_random_seed.
evalCtx.SessionData().TestingOptimizerRandomSeed = 100
stale()
Expand Down
94 changes: 83 additions & 11 deletions pkg/sql/opt/memo/testdata/stats/join
Original file line number Diff line number Diff line change
Expand Up @@ -1648,20 +1648,92 @@ ALTER TABLE uv INJECT STATISTICS '[
opt
SELECT * FROM xysd, uv WHERE (s = 'foo' AND u = 3 AND v = 4) OR (s = 'bar' AND u = 5 AND v = 6)
----
inner-join (cross)
project
├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) u:7(int!null) v:8(int!null)
├── stats: [rows=59573.61, distinct(3)=2, null(3)=0, distinct(7)=2, null(7)=0, distinct(8)=2, null(8)=0, distinct(7,8)=2.18365, null(7,8)=0]
├── fd: (1)-->(2-4), (3,4)-->(1,2)
├── scan uv
│ ├── columns: u:7(int) v:8(int!null)
│ └── stats: [rows=10000, distinct(7)=500, null(7)=0, distinct(8)=100, null(8)=0, distinct(7,8)=550, null(7,8)=0]
├── scan xysd
│ ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
│ ├── stats: [rows=5000, distinct(1)=5000, null(1)=0, distinct(3)=10, null(3)=0, distinct(4)=500, null(4)=0]
│ ├── key: (1)
│ └── fd: (1)-->(2-4), (3,4)~~>(1,2)
└── filters
└── (((s:3 = 'foo') AND (u:7 = 3)) AND (v:8 = 4)) OR (((s:3 = 'bar') AND (u:7 = 5)) AND (v:8 = 6)) [type=bool, outer=(3,7,8), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; /7: [/3 - /3] [/5 - /5]; /8: [/4 - /4] [/6 - /6])]
└── distinct-on
├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) u:7(int!null) v:8(int!null) rowid:9(int!null)
├── grouping columns: x:1(int!null) rowid:9(int!null)
├── stats: [rows=16383.64, distinct(1,9)=16383.6, null(1,9)=0]
├── key: (1,9)
├── fd: (1,9)-->(2-4,7,8)
├── union-all
│ ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null) u:7(int!null) v:8(int!null) rowid:9(int!null)
│ ├── left columns: x:12(int) y:13(int) s:14(string) d:15(decimal) u:18(int) v:19(int) rowid:20(int)
│ ├── right columns: x:23(int) y:24(int) s:25(string) d:26(decimal) u:29(int) v:30(int) rowid:31(int)
│ ├── stats: [rows=16383.64, distinct(1,9)=16383.6, null(1,9)=0]
│ ├── inner-join (cross)
│ │ ├── columns: x:12(int!null) y:13(int) s:14(string!null) d:15(decimal!null) u:18(int!null) v:19(int!null) rowid:20(int!null)
│ │ ├── stats: [rows=8191.818, distinct(12,20)=8191.82, null(12,20)=0]
│ │ ├── key: (12,20)
│ │ ├── fd: ()-->(14,18,19), (12)-->(13,15), (15)-->(12,13)
│ │ ├── index-join xysd
│ │ │ ├── columns: x:12(int!null) y:13(int) s:14(string!null) d:15(decimal!null)
│ │ │ ├── stats: [rows=500, distinct(12)=500, null(12)=0, distinct(14)=1, null(14)=0]
│ │ │ ├── key: (12)
│ │ │ ├── fd: ()-->(14), (12)-->(13,15), (15)-->(12,13)
│ │ │ └── scan xysd@xysd_s_d_key
│ │ │ ├── columns: x:12(int!null) s:14(string!null) d:15(decimal!null)
│ │ │ ├── constraint: /-14/15: [/'foo' - /'foo']
│ │ │ ├── stats: [rows=500, distinct(14)=1, null(14)=0]
│ │ │ ├── key: (12)
│ │ │ └── fd: ()-->(14), (12)-->(15), (15)-->(12)
│ │ ├── select
│ │ │ ├── columns: u:18(int!null) v:19(int!null) rowid:20(int!null)
│ │ │ ├── stats: [rows=16.38364, distinct(18)=1, null(18)=0, distinct(19)=1, null(19)=0, distinct(20)=16.3836, null(20)=0, distinct(18,19)=1, null(18,19)=0]
│ │ │ ├── key: (20)
│ │ │ ├── fd: ()-->(18,19)
│ │ │ ├── scan uv
│ │ │ │ ├── columns: u:18(int) v:19(int!null) rowid:20(int!null)
│ │ │ │ ├── stats: [rows=10000, distinct(18)=500, null(18)=0, distinct(19)=100, null(19)=0, distinct(20)=10000, null(20)=0, distinct(18,19)=550, null(18,19)=0]
│ │ │ │ ├── key: (20)
│ │ │ │ └── fd: (20)-->(18,19)
│ │ │ └── filters
│ │ │ ├── u:18 = 3 [type=bool, outer=(18), constraints=(/18: [/3 - /3]; tight), fd=()-->(18)]
│ │ │ └── v:19 = 4 [type=bool, outer=(19), constraints=(/19: [/4 - /4]; tight), fd=()-->(19)]
│ │ └── filters (true)
│ └── inner-join (cross)
│ ├── columns: x:23(int!null) y:24(int) s:25(string!null) d:26(decimal!null) u:29(int!null) v:30(int!null) rowid:31(int!null)
│ ├── stats: [rows=8191.818, distinct(23,31)=8191.82, null(23,31)=0]
│ ├── key: (23,31)
│ ├── fd: ()-->(25,29,30), (23)-->(24,26), (26)-->(23,24)
│ ├── index-join xysd
│ │ ├── columns: x:23(int!null) y:24(int) s:25(string!null) d:26(decimal!null)
│ │ ├── stats: [rows=500, distinct(23)=500, null(23)=0, distinct(25)=1, null(25)=0]
│ │ ├── key: (23)
│ │ ├── fd: ()-->(25), (23)-->(24,26), (26)-->(23,24)
│ │ └── scan xysd@xysd_s_d_key
│ │ ├── columns: x:23(int!null) s:25(string!null) d:26(decimal!null)
│ │ ├── constraint: /-25/26: [/'bar' - /'bar']
│ │ ├── stats: [rows=500, distinct(25)=1, null(25)=0]
│ │ ├── key: (23)
│ │ └── fd: ()-->(25), (23)-->(26), (26)-->(23)
│ ├── select
│ │ ├── columns: u:29(int!null) v:30(int!null) rowid:31(int!null)
│ │ ├── stats: [rows=16.38364, distinct(29)=1, null(29)=0, distinct(30)=1, null(30)=0, distinct(31)=16.3836, null(31)=0, distinct(29,30)=1, null(29,30)=0]
│ │ ├── key: (31)
│ │ ├── fd: ()-->(29,30)
│ │ ├── scan uv
│ │ │ ├── columns: u:29(int) v:30(int!null) rowid:31(int!null)
│ │ │ ├── stats: [rows=10000, distinct(29)=500, null(29)=0, distinct(30)=100, null(30)=0, distinct(31)=10000, null(31)=0, distinct(29,30)=550, null(29,30)=0]
│ │ │ ├── key: (31)
│ │ │ └── fd: (31)-->(29,30)
│ │ └── filters
│ │ ├── u:29 = 5 [type=bool, outer=(29), constraints=(/29: [/5 - /5]; tight), fd=()-->(29)]
│ │ └── v:30 = 6 [type=bool, outer=(30), constraints=(/30: [/6 - /6]; tight), fd=()-->(30)]
│ └── filters (true)
└── aggregations
├── const-agg [as=y:2, type=int, outer=(2)]
│ └── y:2 [type=int]
├── const-agg [as=s:3, type=string, outer=(3)]
│ └── s:3 [type=string]
├── const-agg [as=d:4, type=decimal, outer=(4)]
│ └── d:4 [type=decimal]
├── const-agg [as=u:7, type=int, outer=(7)]
│ └── u:7 [type=int]
└── const-agg [as=v:8, type=int, outer=(8)]
└── v:8 [type=int]

# Test selectivity of ORed join predicates
# Estimate of # rows should be low, and nowhere near the no-stats
Expand Down
Loading