From c7c468610d7f97b9e0e919d7bbb32ed3379fdab3 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Mon, 6 Mar 2023 17:21:11 +0800 Subject: [PATCH] sysvar: introduce variable tidb_enable_inl_join_inner_multi_pattern (#41319) (#41326) ref pingcap/tidb#40505 --- executor/index_advise_test.go | 133 +++++++++++++++++++++++ planner/core/exhaust_physical_plans.go | 141 ++++++++++++++++++++----- sessionctx/variable/session.go | 3 + sessionctx/variable/sysvar.go | 9 ++ sessionctx/variable/tidb_vars.go | 3 + 5 files changed, 263 insertions(+), 26 deletions(-) diff --git a/executor/index_advise_test.go b/executor/index_advise_test.go index 5371ecd051bc1..b0f93bd2b48ad 100644 --- a/executor/index_advise_test.go +++ b/executor/index_advise_test.go @@ -69,3 +69,136 @@ func TestIndexAdvise(t *testing.T) { require.Equal(t, uint64(4), ia.MaxIndexNum.PerTable) require.Equal(t, uint64(5), ia.MaxIndexNum.PerDB) } + +func TestIndexJoinProjPattern(t *testing.T) { + store, clean := testkit.CreateMockStore(t) + defer clean() + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec(`create table t1( +pnbrn_cnaps varchar(5) not null, +new_accno varchar(18) not null, +primary key(pnbrn_cnaps,new_accno) nonclustered +);`) + tk.MustExec(`create table t2( +pnbrn_cnaps varchar(5) not null, +txn_accno varchar(18) not null, +txn_dt date not null, +yn_frz varchar(1) default null +);`) + tk.MustExec(`insert into t1(pnbrn_cnaps,new_accno) values ("40001","123")`) + tk.MustExec(`insert into t2(pnbrn_cnaps, txn_accno, txn_dt, yn_frz) values ("40001","123","20221201","0");`) + + sql := `update +/*+ inl_join(a) */ +t2 b, +( +select t1.pnbrn_cnaps, +t1.new_accno +from t1 +where t1.pnbrn_cnaps = '40001' +) a +set b.yn_frz = '1' +where b.txn_dt = str_to_date('20221201', '%Y%m%d') +and b.pnbrn_cnaps = a.pnbrn_cnaps +and b.txn_accno = a.new_accno;` + rows := [][]interface{}{ + {"Update_8"}, + {"└─IndexJoin_13"}, + {" ├─TableReader_23(Build)"}, + {" │ └─Selection_22"}, + {" │ └─TableFullScan_21"}, + {" └─IndexReader_12(Probe)"}, + {" └─Selection_11"}, + {" └─IndexRangeScan_10"}, + } + tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'") + tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows) + rows = [][]interface{}{ + {"Update_8"}, + {"└─HashJoin_10"}, + {" ├─IndexReader_17(Build)"}, + {" │ └─IndexRangeScan_16"}, + {" └─TableReader_14(Probe)"}, + {" └─Selection_13"}, + {" └─TableFullScan_12"}, + } + tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'") + tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows) + + tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'") + tk.MustExec(sql) + tk.MustQuery("select yn_frz from t2").Check(testkit.Rows("1")) +} + +func TestIndexJoinSelPattern(t *testing.T) { + store, clean := testkit.CreateMockStore(t) + defer clean() + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec(` create table tbl_miss( +id bigint(20) unsigned not null +,txn_dt date default null +,perip_sys_uuid varchar(32) not null +,rvrs_idr varchar(1) not null +,primary key(id) clustered +,key idx1 (txn_dt, perip_sys_uuid, rvrs_idr) +); +`) + tk.MustExec(`insert into tbl_miss (id,txn_dt,perip_sys_uuid,rvrs_idr) values (1,"20221201","123","1");`) + tk.MustExec(`create table tbl_src( +txn_dt date default null +,uuid varchar(32) not null +,rvrs_idr char(1) +,expd_inf varchar(5000) +,primary key(uuid,rvrs_idr) nonclustered +); +`) + tk.MustExec(`insert into tbl_src (txn_dt,uuid,rvrs_idr) values ("20221201","123","1");`) + sql := `select /*+ use_index(mis,) inl_join(src) */ + * + from tbl_miss mis + ,tbl_src src + where src.txn_dt >= str_to_date('20221201', '%Y%m%d') + and mis.id between 1 and 10000 + and mis.perip_sys_uuid = src.uuid + and mis.rvrs_idr = src.rvrs_idr + and mis.txn_dt = src.txn_dt + and ( + case when isnull(src.expd_inf) = 1 then '' + else + substr(concat_ws('',src.expd_inf,'~~'), + instr(concat_ws('',src.expd_inf,'~~'),'~~a4') + 4, + instr(substr(concat_ws('',src.expd_inf,'~~'), + instr(concat_ws('',src.expd_inf,'~~'),'~~a4') + 4, length(concat_ws('',src.expd_inf,'~~'))),'~~') -1) + end + ) != '01';` + rows := [][]interface{}{ + {"HashJoin_9"}, + {"├─TableReader_12(Build)"}, + {"│ └─Selection_11"}, + {"│ └─TableRangeScan_10"}, + {"└─Selection_13(Probe)"}, + {" └─TableReader_16"}, + {" └─Selection_15"}, + {" └─TableFullScan_14"}, + } + tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'") + tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows) + rows = [][]interface{}{ + {"IndexJoin_12"}, + {"├─TableReader_23(Build)"}, + {"│ └─Selection_22"}, + {"│ └─TableRangeScan_21"}, + {"└─IndexLookUp_11(Probe)"}, + {" ├─IndexRangeScan_8(Build)"}, + {" └─Selection_10(Probe)"}, + {" └─TableRowIDScan_9"}, + } + tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'") + tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows) + tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'") + tk.MustQuery(sql).Check(testkit.Rows("1 2022-12-01 123 1 2022-12-01 123 1 ")) + tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'") + tk.MustQuery(sql).Check(testkit.Rows("1 2022-12-01 123 1 2022-12-01 123 1 ")) +} diff --git a/planner/core/exhaust_physical_plans.go b/planner/core/exhaust_physical_plans.go index fecce00ced731..9d7653d29a277 100644 --- a/planner/core/exhaust_physical_plans.go +++ b/planner/core/exhaust_physical_plans.go @@ -685,33 +685,77 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou } else { innerJoinKeys, outerJoinKeys, _, _ = p.GetJoinKeys() } - ds, isDataSource := innerChild.(*DataSource) - us, isUnionScan := innerChild.(*LogicalUnionScan) - if (!isDataSource && !isUnionScan) || (isDataSource && ds.preferStoreType&preferTiFlash != 0) { + innerChildWrapper := p.extractIndexJoinInnerChildPattern(innerChild) + if innerChildWrapper == nil { return nil } - if isUnionScan { - // The child of union scan may be union all for partition table. - ds, isDataSource = us.Children()[0].(*DataSource) + var avgInnerRowCnt float64 + if outerChild.statsInfo().RowCount > 0 { + avgInnerRowCnt = p.equalCondOutCnt / outerChild.statsInfo().RowCount + } + joins = p.buildIndexJoinInner2TableScan(prop, innerChildWrapper, innerJoinKeys, outerJoinKeys, outerIdx, avgInnerRowCnt) + if joins != nil { + return + } + return p.buildIndexJoinInner2IndexScan(prop, innerChildWrapper, innerJoinKeys, outerJoinKeys, outerIdx, avgInnerRowCnt) +} + +type indexJoinInnerChildWrapper struct { + ds *DataSource + us *LogicalUnionScan + proj *LogicalProjection + sel *LogicalSelection +} + +func (p *LogicalJoin) extractIndexJoinInnerChildPattern(innerChild LogicalPlan) *indexJoinInnerChildWrapper { + wrapper := &indexJoinInnerChildWrapper{} + switch child := innerChild.(type) { + case *DataSource: + wrapper.ds = child + case *LogicalUnionScan: + wrapper.us = child + ds, isDataSource := wrapper.us.Children()[0].(*DataSource) if !isDataSource { return nil } + wrapper.ds = ds // If one of the union scan children is a TiFlash table, then we can't choose index join. - for _, child := range us.Children() { + for _, child := range wrapper.us.Children() { if ds, ok := child.(*DataSource); ok && ds.preferStoreType&preferTiFlash != 0 { return nil } } + case *LogicalProjection: + if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern { + return nil + } + // For now, we only allow proj with all Column expression can be the inner side of index join + for _, expr := range child.Exprs { + if _, ok := expr.(*expression.Column); !ok { + return nil + } + } + wrapper.proj = child + ds, isDataSource := wrapper.proj.Children()[0].(*DataSource) + if !isDataSource { + return nil + } + wrapper.ds = ds + case *LogicalSelection: + if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern { + return nil + } + wrapper.sel = child + ds, isDataSource := wrapper.sel.Children()[0].(*DataSource) + if !isDataSource { + return nil + } + wrapper.ds = ds } - var avgInnerRowCnt float64 - if outerChild.statsInfo().RowCount > 0 { - avgInnerRowCnt = p.equalCondOutCnt / outerChild.statsInfo().RowCount - } - joins = p.buildIndexJoinInner2TableScan(prop, ds, innerJoinKeys, outerJoinKeys, outerIdx, us, avgInnerRowCnt) - if joins != nil { - return + if wrapper.ds == nil || wrapper.ds.preferStoreType&preferTiFlash != 0 { + return nil } - return p.buildIndexJoinInner2IndexScan(prop, ds, innerJoinKeys, outerJoinKeys, outerIdx, us, avgInnerRowCnt) + return wrapper } func (p *LogicalJoin) getIndexJoinBuildHelper(ds *DataSource, innerJoinKeys []*expression.Column, checkPathValid func(path *util.AccessPath) bool, outerJoinKeys []*expression.Column) (*indexJoinBuildHelper, []int) { @@ -751,8 +795,10 @@ func (p *LogicalJoin) getIndexJoinBuildHelper(ds *DataSource, innerJoinKeys []*e // fetched from the inner side for every tuple from the outer side. This will be // promised to be no worse than building IndexScan as the inner child. func (p *LogicalJoin) buildIndexJoinInner2TableScan( - prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column, - outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) { + prop *property.PhysicalProperty, wrapper *indexJoinInnerChildWrapper, innerJoinKeys, outerJoinKeys []*expression.Column, + outerIdx int, avgInnerRowCnt float64) (joins []PhysicalPlan) { + ds := wrapper.ds + us := wrapper.us var tblPath *util.AccessPath for _, path := range ds.possibleAccessPaths { if path.IsTablePath() && path.StoreType == kv.TiKV { @@ -773,13 +819,13 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan( if helper == nil { return nil } - innerTask = p.constructInnerTableScanTask(ds, helper.chosenRanges.Range(), outerJoinKeys, us, false, false, avgInnerRowCnt) + innerTask = p.constructInnerTableScanTask(wrapper, helper.chosenRanges.Range(), outerJoinKeys, false, false, avgInnerRowCnt) // The index merge join's inner plan is different from index join, so we // should construct another inner plan for it. // Because we can't keep order for union scan, if there is a union scan in inner task, // we can't construct index merge join. if us == nil { - innerTask2 = p.constructInnerTableScanTask(ds, helper.chosenRanges.Range(), outerJoinKeys, us, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt) + innerTask2 = p.constructInnerTableScanTask(wrapper, helper.chosenRanges.Range(), outerJoinKeys, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt) } ranges = helper.chosenRanges } else { @@ -803,13 +849,13 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan( return nil } ranges := ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.GetFlag())) - innerTask = p.constructInnerTableScanTask(ds, ranges, outerJoinKeys, us, false, false, avgInnerRowCnt) + innerTask = p.constructInnerTableScanTask(wrapper, ranges, outerJoinKeys, false, false, avgInnerRowCnt) // The index merge join's inner plan is different from index join, so we // should construct another inner plan for it. // Because we can't keep order for union scan, if there is a union scan in inner task, // we can't construct index merge join. if us == nil { - innerTask2 = p.constructInnerTableScanTask(ds, ranges, outerJoinKeys, us, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt) + innerTask2 = p.constructInnerTableScanTask(wrapper, ranges, outerJoinKeys, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt) } } var ( @@ -837,8 +883,10 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan( } func (p *LogicalJoin) buildIndexJoinInner2IndexScan( - prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column, - outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) { + prop *property.PhysicalProperty, wrapper *indexJoinInnerChildWrapper, innerJoinKeys, outerJoinKeys []*expression.Column, + outerIdx int, avgInnerRowCnt float64) (joins []PhysicalPlan) { + ds := wrapper.ds + us := wrapper.us helper, keyOff2IdxOff := p.getIndexJoinBuildHelper(ds, innerJoinKeys, func(path *util.AccessPath) bool { return !path.IsTablePath() }, outerJoinKeys) if helper == nil { return nil @@ -925,14 +973,14 @@ func (ijHelper *indexJoinBuildHelper) buildRangeDecidedByInformation(idxCols []* // constructInnerTableScanTask is specially used to construct the inner plan for PhysicalIndexJoin. func (p *LogicalJoin) constructInnerTableScanTask( - ds *DataSource, + wrapper *indexJoinInnerChildWrapper, ranges ranger.Ranges, outerJoinKeys []*expression.Column, - us *LogicalUnionScan, keepOrder bool, desc bool, rowCount float64, ) task { + ds := wrapper.ds // If `ds.tableInfo.GetPartitionInfo() != nil`, // it means the data source is a partition table reader. // If the inner task need to keep order, the partition table reader can't satisfy it. @@ -997,10 +1045,51 @@ func (p *LogicalJoin) constructInnerTableScanTask( ts.addPushedDownSelection(copTask, selStats) t := copTask.convertToRootTask(ds.ctx) reader := t.p - t.p = p.constructInnerUnionScan(us, reader) + t.p = p.constructInnerByWrapper(wrapper, reader) return t } +func (p *LogicalJoin) constructInnerByWrapper(wrapper *indexJoinInnerChildWrapper, child PhysicalPlan) PhysicalPlan { + if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern { + if wrapper.us != nil { + return p.constructInnerUnionScan(wrapper.us, child) + } + return child + } + if wrapper.us != nil { + return p.constructInnerUnionScan(wrapper.us, child) + } else if wrapper.proj != nil { + return p.constructInnerProj(wrapper.proj, child) + } else if wrapper.sel != nil { + return p.constructInnerSel(wrapper.sel, child) + } + return child +} + +func (p *LogicalJoin) constructInnerSel(sel *LogicalSelection, child PhysicalPlan) PhysicalPlan { + if sel == nil { + return child + } + physicalSel := PhysicalSelection{ + Conditions: sel.Conditions, + }.Init(sel.ctx, sel.stats, sel.blockOffset, nil) + physicalSel.SetChildren(child) + return physicalSel +} + +func (p *LogicalJoin) constructInnerProj(proj *LogicalProjection, child PhysicalPlan) PhysicalPlan { + if proj == nil { + return child + } + physicalProj := PhysicalProjection{ + Exprs: proj.Exprs, + CalculateNoDelay: proj.CalculateNoDelay, + AvoidColumnEvaluator: proj.AvoidColumnEvaluator, + }.Init(proj.ctx, proj.stats, proj.blockOffset, nil) + physicalProj.SetChildren(child) + return physicalProj +} + func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader PhysicalPlan) PhysicalPlan { if us == nil { return reader diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index f480365321b94..49cb29bd07cf6 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -1055,6 +1055,9 @@ type SessionVars struct { // When it is false, ANALYZE reads the latest data. // When it is true, ANALYZE reads data on the snapshot at the beginning of ANALYZE. EnableAnalyzeSnapshot bool + + // EnableINLJoinInnerMultiPattern indicates whether enable multi pattern for index join inner side + EnableINLJoinInnerMultiPattern bool } // InitStatementContext initializes a StatementContext, the object is reused to reduce allocation. diff --git a/sessionctx/variable/sysvar.go b/sessionctx/variable/sysvar.go index 1a1944f7abd01..c242df95612f1 100644 --- a/sessionctx/variable/sysvar.go +++ b/sessionctx/variable/sysvar.go @@ -1584,6 +1584,15 @@ var defaultSysVars = []*SysVar{ s.EnableAnalyzeSnapshot = TiDBOptOn(val) return nil }}, + {Scope: ScopeGlobal | ScopeSession, Name: TiDBEnableINLJoinInnerMultiPattern, Value: BoolToOnOff(false), Type: TypeBool, + SetSession: func(s *SessionVars, val string) error { + s.EnableINLJoinInnerMultiPattern = TiDBOptOn(val) + return nil + }, + GetSession: func(s *SessionVars) (string, error) { + return BoolToOnOff(s.EnableINLJoinInnerMultiPattern), nil + }, + }, } // FeedbackProbability points to the FeedbackProbability in statistics package. diff --git a/sessionctx/variable/tidb_vars.go b/sessionctx/variable/tidb_vars.go index 59ac2dc6a1b6c..679fd26e0ba4b 100644 --- a/sessionctx/variable/tidb_vars.go +++ b/sessionctx/variable/tidb_vars.go @@ -652,6 +652,9 @@ const ( // When set to false, ANALYZE reads the latest data. // When set to true, ANALYZE reads data on the snapshot at the beginning of ANALYZE. TiDBEnableAnalyzeSnapshot = "tidb_enable_analyze_snapshot" + + // TiDBEnableINLJoinInnerMultiPattern indicates whether enable multi pattern for inner side of inl join + TiDBEnableINLJoinInnerMultiPattern = "tidb_enable_inl_join_inner_multi_pattern" ) // TiDB vars that have only global scope