Skip to content

Commit

Permalink
sysvar: introduce variable tidb_enable_inl_join_inner_multi_pattern (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot authored Mar 6, 2023
1 parent 17718a7 commit c7c4686
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 26 deletions.
133 changes: 133 additions & 0 deletions executor/index_advise_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,136 @@ func TestIndexAdvise(t *testing.T) {
require.Equal(t, uint64(4), ia.MaxIndexNum.PerTable)
require.Equal(t, uint64(5), ia.MaxIndexNum.PerDB)
}

func TestIndexJoinProjPattern(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(`create table t1(
pnbrn_cnaps varchar(5) not null,
new_accno varchar(18) not null,
primary key(pnbrn_cnaps,new_accno) nonclustered
);`)
tk.MustExec(`create table t2(
pnbrn_cnaps varchar(5) not null,
txn_accno varchar(18) not null,
txn_dt date not null,
yn_frz varchar(1) default null
);`)
tk.MustExec(`insert into t1(pnbrn_cnaps,new_accno) values ("40001","123")`)
tk.MustExec(`insert into t2(pnbrn_cnaps, txn_accno, txn_dt, yn_frz) values ("40001","123","20221201","0");`)

sql := `update
/*+ inl_join(a) */
t2 b,
(
select t1.pnbrn_cnaps,
t1.new_accno
from t1
where t1.pnbrn_cnaps = '40001'
) a
set b.yn_frz = '1'
where b.txn_dt = str_to_date('20221201', '%Y%m%d')
and b.pnbrn_cnaps = a.pnbrn_cnaps
and b.txn_accno = a.new_accno;`
rows := [][]interface{}{
{"Update_8"},
{"└─IndexJoin_13"},
{" ├─TableReader_23(Build)"},
{" │ └─Selection_22"},
{" │ └─TableFullScan_21"},
{" └─IndexReader_12(Probe)"},
{" └─Selection_11"},
{" └─IndexRangeScan_10"},
}
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'")
tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows)
rows = [][]interface{}{
{"Update_8"},
{"└─HashJoin_10"},
{" ├─IndexReader_17(Build)"},
{" │ └─IndexRangeScan_16"},
{" └─TableReader_14(Probe)"},
{" └─Selection_13"},
{" └─TableFullScan_12"},
}
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'")
tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows)

tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'")
tk.MustExec(sql)
tk.MustQuery("select yn_frz from t2").Check(testkit.Rows("1"))
}

func TestIndexJoinSelPattern(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec(` create table tbl_miss(
id bigint(20) unsigned not null
,txn_dt date default null
,perip_sys_uuid varchar(32) not null
,rvrs_idr varchar(1) not null
,primary key(id) clustered
,key idx1 (txn_dt, perip_sys_uuid, rvrs_idr)
);
`)
tk.MustExec(`insert into tbl_miss (id,txn_dt,perip_sys_uuid,rvrs_idr) values (1,"20221201","123","1");`)
tk.MustExec(`create table tbl_src(
txn_dt date default null
,uuid varchar(32) not null
,rvrs_idr char(1)
,expd_inf varchar(5000)
,primary key(uuid,rvrs_idr) nonclustered
);
`)
tk.MustExec(`insert into tbl_src (txn_dt,uuid,rvrs_idr) values ("20221201","123","1");`)
sql := `select /*+ use_index(mis,) inl_join(src) */
*
from tbl_miss mis
,tbl_src src
where src.txn_dt >= str_to_date('20221201', '%Y%m%d')
and mis.id between 1 and 10000
and mis.perip_sys_uuid = src.uuid
and mis.rvrs_idr = src.rvrs_idr
and mis.txn_dt = src.txn_dt
and (
case when isnull(src.expd_inf) = 1 then ''
else
substr(concat_ws('',src.expd_inf,'~~'),
instr(concat_ws('',src.expd_inf,'~~'),'~~a4') + 4,
instr(substr(concat_ws('',src.expd_inf,'~~'),
instr(concat_ws('',src.expd_inf,'~~'),'~~a4') + 4, length(concat_ws('',src.expd_inf,'~~'))),'~~') -1)
end
) != '01';`
rows := [][]interface{}{
{"HashJoin_9"},
{"├─TableReader_12(Build)"},
{"│ └─Selection_11"},
{"│ └─TableRangeScan_10"},
{"└─Selection_13(Probe)"},
{" └─TableReader_16"},
{" └─Selection_15"},
{" └─TableFullScan_14"},
}
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'")
tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows)
rows = [][]interface{}{
{"IndexJoin_12"},
{"├─TableReader_23(Build)"},
{"│ └─Selection_22"},
{"│ └─TableRangeScan_21"},
{"└─IndexLookUp_11(Probe)"},
{" ├─IndexRangeScan_8(Build)"},
{" └─Selection_10(Probe)"},
{" └─TableRowIDScan_9"},
}
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'")
tk.MustQuery("explain "+sql).CheckAt([]int{0}, rows)
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='ON'")
tk.MustQuery(sql).Check(testkit.Rows("1 2022-12-01 123 1 2022-12-01 123 1 <nil>"))
tk.MustExec("set @@session.tidb_enable_inl_join_inner_multi_pattern='OFF'")
tk.MustQuery(sql).Check(testkit.Rows("1 2022-12-01 123 1 2022-12-01 123 1 <nil>"))
}
141 changes: 115 additions & 26 deletions planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,33 +685,77 @@ func (p *LogicalJoin) getIndexJoinByOuterIdx(prop *property.PhysicalProperty, ou
} else {
innerJoinKeys, outerJoinKeys, _, _ = p.GetJoinKeys()
}
ds, isDataSource := innerChild.(*DataSource)
us, isUnionScan := innerChild.(*LogicalUnionScan)
if (!isDataSource && !isUnionScan) || (isDataSource && ds.preferStoreType&preferTiFlash != 0) {
innerChildWrapper := p.extractIndexJoinInnerChildPattern(innerChild)
if innerChildWrapper == nil {
return nil
}
if isUnionScan {
// The child of union scan may be union all for partition table.
ds, isDataSource = us.Children()[0].(*DataSource)
var avgInnerRowCnt float64
if outerChild.statsInfo().RowCount > 0 {
avgInnerRowCnt = p.equalCondOutCnt / outerChild.statsInfo().RowCount
}
joins = p.buildIndexJoinInner2TableScan(prop, innerChildWrapper, innerJoinKeys, outerJoinKeys, outerIdx, avgInnerRowCnt)
if joins != nil {
return
}
return p.buildIndexJoinInner2IndexScan(prop, innerChildWrapper, innerJoinKeys, outerJoinKeys, outerIdx, avgInnerRowCnt)
}

type indexJoinInnerChildWrapper struct {
ds *DataSource
us *LogicalUnionScan
proj *LogicalProjection
sel *LogicalSelection
}

func (p *LogicalJoin) extractIndexJoinInnerChildPattern(innerChild LogicalPlan) *indexJoinInnerChildWrapper {
wrapper := &indexJoinInnerChildWrapper{}
switch child := innerChild.(type) {
case *DataSource:
wrapper.ds = child
case *LogicalUnionScan:
wrapper.us = child
ds, isDataSource := wrapper.us.Children()[0].(*DataSource)
if !isDataSource {
return nil
}
wrapper.ds = ds
// If one of the union scan children is a TiFlash table, then we can't choose index join.
for _, child := range us.Children() {
for _, child := range wrapper.us.Children() {
if ds, ok := child.(*DataSource); ok && ds.preferStoreType&preferTiFlash != 0 {
return nil
}
}
case *LogicalProjection:
if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern {
return nil
}
// For now, we only allow proj with all Column expression can be the inner side of index join
for _, expr := range child.Exprs {
if _, ok := expr.(*expression.Column); !ok {
return nil
}
}
wrapper.proj = child
ds, isDataSource := wrapper.proj.Children()[0].(*DataSource)
if !isDataSource {
return nil
}
wrapper.ds = ds
case *LogicalSelection:
if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern {
return nil
}
wrapper.sel = child
ds, isDataSource := wrapper.sel.Children()[0].(*DataSource)
if !isDataSource {
return nil
}
wrapper.ds = ds
}
var avgInnerRowCnt float64
if outerChild.statsInfo().RowCount > 0 {
avgInnerRowCnt = p.equalCondOutCnt / outerChild.statsInfo().RowCount
}
joins = p.buildIndexJoinInner2TableScan(prop, ds, innerJoinKeys, outerJoinKeys, outerIdx, us, avgInnerRowCnt)
if joins != nil {
return
if wrapper.ds == nil || wrapper.ds.preferStoreType&preferTiFlash != 0 {
return nil
}
return p.buildIndexJoinInner2IndexScan(prop, ds, innerJoinKeys, outerJoinKeys, outerIdx, us, avgInnerRowCnt)
return wrapper
}

func (p *LogicalJoin) getIndexJoinBuildHelper(ds *DataSource, innerJoinKeys []*expression.Column, checkPathValid func(path *util.AccessPath) bool, outerJoinKeys []*expression.Column) (*indexJoinBuildHelper, []int) {
Expand Down Expand Up @@ -751,8 +795,10 @@ func (p *LogicalJoin) getIndexJoinBuildHelper(ds *DataSource, innerJoinKeys []*e
// fetched from the inner side for every tuple from the outer side. This will be
// promised to be no worse than building IndexScan as the inner child.
func (p *LogicalJoin) buildIndexJoinInner2TableScan(
prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) {
prop *property.PhysicalProperty, wrapper *indexJoinInnerChildWrapper, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, avgInnerRowCnt float64) (joins []PhysicalPlan) {
ds := wrapper.ds
us := wrapper.us
var tblPath *util.AccessPath
for _, path := range ds.possibleAccessPaths {
if path.IsTablePath() && path.StoreType == kv.TiKV {
Expand All @@ -773,13 +819,13 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan(
if helper == nil {
return nil
}
innerTask = p.constructInnerTableScanTask(ds, helper.chosenRanges.Range(), outerJoinKeys, us, false, false, avgInnerRowCnt)
innerTask = p.constructInnerTableScanTask(wrapper, helper.chosenRanges.Range(), outerJoinKeys, false, false, avgInnerRowCnt)
// The index merge join's inner plan is different from index join, so we
// should construct another inner plan for it.
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 = p.constructInnerTableScanTask(ds, helper.chosenRanges.Range(), outerJoinKeys, us, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt)
innerTask2 = p.constructInnerTableScanTask(wrapper, helper.chosenRanges.Range(), outerJoinKeys, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt)
}
ranges = helper.chosenRanges
} else {
Expand All @@ -803,13 +849,13 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan(
return nil
}
ranges := ranger.FullIntRange(mysql.HasUnsignedFlag(pkCol.RetType.GetFlag()))
innerTask = p.constructInnerTableScanTask(ds, ranges, outerJoinKeys, us, false, false, avgInnerRowCnt)
innerTask = p.constructInnerTableScanTask(wrapper, ranges, outerJoinKeys, false, false, avgInnerRowCnt)
// The index merge join's inner plan is different from index join, so we
// should construct another inner plan for it.
// Because we can't keep order for union scan, if there is a union scan in inner task,
// we can't construct index merge join.
if us == nil {
innerTask2 = p.constructInnerTableScanTask(ds, ranges, outerJoinKeys, us, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt)
innerTask2 = p.constructInnerTableScanTask(wrapper, ranges, outerJoinKeys, true, !prop.IsSortItemEmpty() && prop.SortItems[0].Desc, avgInnerRowCnt)
}
}
var (
Expand Down Expand Up @@ -837,8 +883,10 @@ func (p *LogicalJoin) buildIndexJoinInner2TableScan(
}

func (p *LogicalJoin) buildIndexJoinInner2IndexScan(
prop *property.PhysicalProperty, ds *DataSource, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, us *LogicalUnionScan, avgInnerRowCnt float64) (joins []PhysicalPlan) {
prop *property.PhysicalProperty, wrapper *indexJoinInnerChildWrapper, innerJoinKeys, outerJoinKeys []*expression.Column,
outerIdx int, avgInnerRowCnt float64) (joins []PhysicalPlan) {
ds := wrapper.ds
us := wrapper.us
helper, keyOff2IdxOff := p.getIndexJoinBuildHelper(ds, innerJoinKeys, func(path *util.AccessPath) bool { return !path.IsTablePath() }, outerJoinKeys)
if helper == nil {
return nil
Expand Down Expand Up @@ -925,14 +973,14 @@ func (ijHelper *indexJoinBuildHelper) buildRangeDecidedByInformation(idxCols []*

// constructInnerTableScanTask is specially used to construct the inner plan for PhysicalIndexJoin.
func (p *LogicalJoin) constructInnerTableScanTask(
ds *DataSource,
wrapper *indexJoinInnerChildWrapper,
ranges ranger.Ranges,
outerJoinKeys []*expression.Column,
us *LogicalUnionScan,
keepOrder bool,
desc bool,
rowCount float64,
) task {
ds := wrapper.ds
// If `ds.tableInfo.GetPartitionInfo() != nil`,
// it means the data source is a partition table reader.
// If the inner task need to keep order, the partition table reader can't satisfy it.
Expand Down Expand Up @@ -997,10 +1045,51 @@ func (p *LogicalJoin) constructInnerTableScanTask(
ts.addPushedDownSelection(copTask, selStats)
t := copTask.convertToRootTask(ds.ctx)
reader := t.p
t.p = p.constructInnerUnionScan(us, reader)
t.p = p.constructInnerByWrapper(wrapper, reader)
return t
}

func (p *LogicalJoin) constructInnerByWrapper(wrapper *indexJoinInnerChildWrapper, child PhysicalPlan) PhysicalPlan {
if !p.ctx.GetSessionVars().EnableINLJoinInnerMultiPattern {
if wrapper.us != nil {
return p.constructInnerUnionScan(wrapper.us, child)
}
return child
}
if wrapper.us != nil {
return p.constructInnerUnionScan(wrapper.us, child)
} else if wrapper.proj != nil {
return p.constructInnerProj(wrapper.proj, child)
} else if wrapper.sel != nil {
return p.constructInnerSel(wrapper.sel, child)
}
return child
}

func (p *LogicalJoin) constructInnerSel(sel *LogicalSelection, child PhysicalPlan) PhysicalPlan {
if sel == nil {
return child
}
physicalSel := PhysicalSelection{
Conditions: sel.Conditions,
}.Init(sel.ctx, sel.stats, sel.blockOffset, nil)
physicalSel.SetChildren(child)
return physicalSel
}

func (p *LogicalJoin) constructInnerProj(proj *LogicalProjection, child PhysicalPlan) PhysicalPlan {
if proj == nil {
return child
}
physicalProj := PhysicalProjection{
Exprs: proj.Exprs,
CalculateNoDelay: proj.CalculateNoDelay,
AvoidColumnEvaluator: proj.AvoidColumnEvaluator,
}.Init(proj.ctx, proj.stats, proj.blockOffset, nil)
physicalProj.SetChildren(child)
return physicalProj
}

func (p *LogicalJoin) constructInnerUnionScan(us *LogicalUnionScan, reader PhysicalPlan) PhysicalPlan {
if us == nil {
return reader
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,9 @@ type SessionVars struct {
// When it is false, ANALYZE reads the latest data.
// When it is true, ANALYZE reads data on the snapshot at the beginning of ANALYZE.
EnableAnalyzeSnapshot bool

// EnableINLJoinInnerMultiPattern indicates whether enable multi pattern for index join inner side
EnableINLJoinInnerMultiPattern bool
}

// InitStatementContext initializes a StatementContext, the object is reused to reduce allocation.
Expand Down
9 changes: 9 additions & 0 deletions sessionctx/variable/sysvar.go
Original file line number Diff line number Diff line change
Expand Up @@ -1584,6 +1584,15 @@ var defaultSysVars = []*SysVar{
s.EnableAnalyzeSnapshot = TiDBOptOn(val)
return nil
}},
{Scope: ScopeGlobal | ScopeSession, Name: TiDBEnableINLJoinInnerMultiPattern, Value: BoolToOnOff(false), Type: TypeBool,
SetSession: func(s *SessionVars, val string) error {
s.EnableINLJoinInnerMultiPattern = TiDBOptOn(val)
return nil
},
GetSession: func(s *SessionVars) (string, error) {
return BoolToOnOff(s.EnableINLJoinInnerMultiPattern), nil
},
},
}

// FeedbackProbability points to the FeedbackProbability in statistics package.
Expand Down
3 changes: 3 additions & 0 deletions sessionctx/variable/tidb_vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,9 @@ const (
// When set to false, ANALYZE reads the latest data.
// When set to true, ANALYZE reads data on the snapshot at the beginning of ANALYZE.
TiDBEnableAnalyzeSnapshot = "tidb_enable_analyze_snapshot"

// TiDBEnableINLJoinInnerMultiPattern indicates whether enable multi pattern for inner side of inl join
TiDBEnableINLJoinInnerMultiPattern = "tidb_enable_inl_join_inner_multi_pattern"
)

// TiDB vars that have only global scope
Expand Down

0 comments on commit c7c4686

Please sign in to comment.