Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: support hash v2 variable to control the join order #58546

Merged
merged 10 commits into from
Dec 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/executor/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ go_library(
"//pkg/executor/internal/util",
"//pkg/executor/internal/vecgroupchecker",
"//pkg/executor/join",
"//pkg/executor/join/joinversion",
"//pkg/executor/lockstats",
"//pkg/executor/metrics",
"//pkg/executor/sortexec",
Expand Down
3 changes: 2 additions & 1 deletion pkg/executor/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"github.com/pingcap/tidb/pkg/executor/internal/testutil"
"github.com/pingcap/tidb/pkg/executor/internal/vecgroupchecker"
"github.com/pingcap/tidb/pkg/executor/join"
"github.com/pingcap/tidb/pkg/executor/join/joinversion"
"github.com/pingcap/tidb/pkg/executor/lockstats"
executor_metrics "github.com/pingcap/tidb/pkg/executor/metrics"
"github.com/pingcap/tidb/pkg/executor/sortexec"
Expand Down Expand Up @@ -1748,7 +1749,7 @@ func (b *executorBuilder) buildHashJoinV2(v *plannercore.PhysicalHashJoin) exec.
}

func (b *executorBuilder) buildHashJoin(v *plannercore.PhysicalHashJoin) exec.Executor {
if b.ctx.GetSessionVars().UseHashJoinV2 && join.IsHashJoinV2Supported() && v.CanUseHashJoinV2() {
if b.ctx.GetSessionVars().UseHashJoinV2 && joinversion.IsHashJoinV2Supported() && v.CanUseHashJoinV2() {
return b.buildHashJoinV2(v)
}
leftExec := b.build(v.Children()[0])
Expand Down
8 changes: 0 additions & 8 deletions pkg/executor/join/hash_join_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,6 @@ var (
HashJoinV2Strings = []string{DisableHashJoinV2, EnableHashJoinV2}
)

// IsHashJoinV2Supported return true if hash join v2 is supported in current env
func IsHashJoinV2Supported() bool {
// sizeOfUintptr should always equal to sizeOfUnsafePointer, because according to golang's doc,
// a Pointer can be converted to an uintptr. Add this check here in case in the future go runtime
// change this
return !heapObjectsCanMove() && sizeOfUintptr >= sizeOfUnsafePointer
}

type hashTableContext struct {
// rowTables is used during split partition stage, each buildWorker has
// its own rowTable
Expand Down
17 changes: 17 additions & 0 deletions pkg/executor/join/joinversion/join_version.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ package joinversion

import (
"strings"
"unsafe"
)

//go:linkname heapObjectsCanMove runtime.heapObjectsCanMove
func heapObjectsCanMove() bool

const (
// HashJoinVersionLegacy means hash join v1
HashJoinVersionLegacy = "legacy"
Expand All @@ -43,3 +47,16 @@ func init() {
func IsOptimizedVersion(hashJoinVersion string) bool {
return strings.ToLower(hashJoinVersion) == HashJoinVersionOptimized
}

const (
sizeOfUintptr = int(unsafe.Sizeof(uintptr(0)))
sizeOfUnsafePointer = int(unsafe.Sizeof(unsafe.Pointer(nil)))
)

// IsHashJoinV2Supported return true if hash join v2 is supported in current env
func IsHashJoinV2Supported() bool {
// sizeOfUintptr should always equal to sizeOfUnsafePointer, because according to golang's doc,
// a Pointer can be converted to an uintptr. Add this check here in case in the future go runtime
// change this
return !heapObjectsCanMove() && sizeOfUintptr >= sizeOfUnsafePointer
}
Original file line number Diff line number Diff line change
Expand Up @@ -1658,29 +1658,32 @@
{
"SQL": "EXPLAIN select /*+ HASH_JOIN_BUILD(s@sel_2) */ * from t where t.a not in (select s.a from s where t.c > s.c); -- 9. anti semi join, one join key + other condition + hint",
"Plan": [
"TableReader_28 8000.00 root MppVersion: 2, data:ExchangeSender_27",
"└─ExchangeSender_27 8000.00 mpp[tiflash] ExchangeType: PassThrough",
" └─HashJoin_22 8000.00 mpp[tiflash] Null-aware anti semi join, left side:TableFullScan_23, equal:[eq(test.t.a, test.s.a)], other cond:gt(test.t.c, test.s.c)",
" ├─ExchangeReceiver_26(Build) 10000.00 mpp[tiflash] ",
" │ └─ExchangeSender_25 10000.00 mpp[tiflash] ExchangeType: Broadcast, Compression: FAST",
" │ └─TableFullScan_24 10000.00 mpp[tiflash] table:s keep order:false, stats:pseudo",
" └─TableFullScan_23(Probe) 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo"
"TableReader_30 8000.00 root MppVersion: 2, data:ExchangeSender_29",
"└─ExchangeSender_29 8000.00 mpp[tiflash] ExchangeType: PassThrough",
" └─HashJoin_28 8000.00 mpp[tiflash] Null-aware anti semi join, left side:TableFullScan_12, equal:[eq(test.t.a, test.s.a)], other cond:gt(test.t.c, test.s.c)",
" ├─ExchangeReceiver_15(Build) 10000.00 mpp[tiflash] ",
" │ └─ExchangeSender_14 10000.00 mpp[tiflash] ExchangeType: Broadcast, Compression: FAST",
" │ └─TableFullScan_13 10000.00 mpp[tiflash] table:s keep order:false, stats:pseudo",
" └─TableFullScan_12(Probe) 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo"
],
"Warn": null
"Warn": [
"[planner:1815]We can't use the HASH_JOIN_BUILD or HASH_JOIN_PROBE hint for anti semi join, please check the hint"
]
},
{
"SQL": "EXPLAIN select /*+ HASH_JOIN_BUILD(t@sel_1) */ * from t where t.a not in (select s.a from s where t.c > s.c); -- 10. anti semi join, one join key + other condition + hint",
"Plan": [
"TableReader_28 8000.00 root MppVersion: 2, data:ExchangeSender_27",
"└─ExchangeSender_27 8000.00 mpp[tiflash] ExchangeType: PassThrough",
" └─HashJoin_22 8000.00 mpp[tiflash] Null-aware anti semi join, left side:TableFullScan_23, equal:[eq(test.t.a, test.s.a)], other cond:gt(test.t.c, test.s.c)",
" ├─ExchangeReceiver_26(Build) 10000.00 mpp[tiflash] ",
" │ └─ExchangeSender_25 10000.00 mpp[tiflash] ExchangeType: Broadcast, Compression: FAST",
" │ └─TableFullScan_24 10000.00 mpp[tiflash] table:s keep order:false, stats:pseudo",
" └─TableFullScan_23(Probe) 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo"
"TableReader_30 8000.00 root MppVersion: 2, data:ExchangeSender_29",
"└─ExchangeSender_29 8000.00 mpp[tiflash] ExchangeType: PassThrough",
" └─HashJoin_28 8000.00 mpp[tiflash] Null-aware anti semi join, left side:TableFullScan_12, equal:[eq(test.t.a, test.s.a)], other cond:gt(test.t.c, test.s.c)",
" ├─ExchangeReceiver_15(Build) 10000.00 mpp[tiflash] ",
" │ └─ExchangeSender_14 10000.00 mpp[tiflash] ExchangeType: Broadcast, Compression: FAST",
" │ └─TableFullScan_13 10000.00 mpp[tiflash] table:s keep order:false, stats:pseudo",
" └─TableFullScan_12(Probe) 10000.00 mpp[tiflash] table:t keep order:false, stats:pseudo"
],
"Warn": [
"[planner:1815]Some HASH_JOIN_BUILD and HASH_JOIN_PROBE hints cannot be utilized for MPP joins, please check the hints",
"[planner:1815]We can't use the HASH_JOIN_BUILD or HASH_JOIN_PROBE hint for anti semi join, please check the hint",
"[planner:1815]Some HASH_JOIN_BUILD and HASH_JOIN_PROBE hints cannot be utilized for MPP joins, please check the hints"
]
}
Expand Down
35 changes: 33 additions & 2 deletions pkg/planner/core/casetest/join/join_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ func TestSemiJoinOrder(t *testing.T) {
"3 1",
"3 2",
"3 4")
tk.MustExec("set tidb_hash_join_version=optimized")
tk.MustQuery("select * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(result)
tk.MustQuery("select /*+ HASH_JOIN_BUILD(t1) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(result)
tk.MustQuery("select /*+ HASH_JOIN_BUILD(t2) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(result)
tk.MustQuery("select /*+ HASH_JOIN_BUILD(t2@sel_2) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(result)
tk.MustQuery("explain format = 'brief' select /*+ HASH_JOIN_BUILD(t1) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(testkit.Rows(
"Sort 7992.00 root test.t1.col0, test.t1.col1",
"└─HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.col0, test.t2.col0)]",
Expand All @@ -47,7 +48,8 @@ func TestSemiJoinOrder(t *testing.T) {
" └─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t2.col0))",
" └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo"))
tk.MustQuery("explain format = 'brief' select /*+ HASH_JOIN_BUILD(t2) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(testkit.Rows(
tk.MustQuery("show warnings").Check(testkit.Rows())
tk.MustQuery("explain format = 'brief' select /*+ HASH_JOIN_BUILD(t2@sel_2) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(testkit.Rows(
"Sort 7992.00 root test.t1.col0, test.t1.col1",
"└─HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.col0, test.t2.col0)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
Expand All @@ -56,4 +58,33 @@ func TestSemiJoinOrder(t *testing.T) {
" └─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.col0))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"))
tk.MustQuery("show warnings").Check(testkit.Rows())
tk.MustExec("set tidb_hash_join_version=legacy")
tk.MustQuery("select * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(result)
tk.MustQuery("select /*+ HASH_JOIN_BUILD(t1) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(result)
tk.MustQuery("select /*+ HASH_JOIN_BUILD(t2@sel_2) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(result)
tk.MustQuery("explain format = 'brief' select /*+ HASH_JOIN_BUILD(t1) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(testkit.Rows(
"Sort 7992.00 root test.t1.col0, test.t1.col1",
"└─HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.col0, test.t2.col0)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
" │ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.col0))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
" └─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.col0))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"))
tk.MustQuery("show warnings").Check(testkit.Rows(
"Warning 1815 We can't use the HASH_JOIN_BUILD or HASH_JOIN_PROBE hint for semi join, please check the hint",
"Warning 1815 We can't use the HASH_JOIN_BUILD or HASH_JOIN_PROBE hint for semi join, please check the hint"))
tk.MustQuery("explain format = 'brief' select /*+ HASH_JOIN_BUILD(t2@sel_2) */ * from t1 where exists (select 1 from t2 where t1.col0 = t2.col0) order by t1.col0, t1.col1;").Check(testkit.Rows(
"Sort 7992.00 root test.t1.col0, test.t1.col1",
"└─HashJoin 7992.00 root semi join, left side:TableReader, equal:[eq(test.t1.col0, test.t2.col0)]",
" ├─TableReader(Build) 9990.00 root data:Selection",
" │ └─Selection 9990.00 cop[tikv] not(isnull(test.t2.col0))",
" │ └─TableFullScan 10000.00 cop[tikv] table:t2 keep order:false, stats:pseudo",
" └─TableReader(Probe) 9990.00 root data:Selection",
" └─Selection 9990.00 cop[tikv] not(isnull(test.t1.col0))",
" └─TableFullScan 10000.00 cop[tikv] table:t1 keep order:false, stats:pseudo"))
tk.MustQuery("show warnings").Check(testkit.Rows(
"Warning 1815 We can't use the HASH_JOIN_BUILD or HASH_JOIN_PROBE hint for semi join, please check the hint",
"Warning 1815 We can't use the HASH_JOIN_BUILD or HASH_JOIN_PROBE hint for semi join, please check the hint"))
}
74 changes: 68 additions & 6 deletions pkg/planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/executor/join/joinversion"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/expression/aggregation"
"github.com/pingcap/tidb/pkg/kv"
Expand Down Expand Up @@ -371,6 +372,56 @@ func shouldSkipHashJoin(p *logicalop.LogicalJoin) bool {
return (p.PreferJoinType&h.PreferNoHashJoin) > 0 || (p.SCtx().GetSessionVars().DisableHashJoin)
}

func isGAForHashJoinV2(joinType logicalop.JoinType, leftJoinKeys []*expression.Column, isNullEQ []bool, leftNAJoinKeys []*expression.Column) bool {
// nullaware join
if len(leftNAJoinKeys) > 0 {
return false
}
// cross join
if len(leftJoinKeys) == 0 {
return false
}
// join with null equal condition
for _, value := range isNullEQ {
if value {
return false
}
}
switch joinType {
case logicalop.LeftOuterJoin, logicalop.RightOuterJoin, logicalop.InnerJoin:
return true
default:
return false
}
}

// CanUseHashJoinV2 returns true if current join is supported by hash join v2
func canUseHashJoinV2(joinType logicalop.JoinType, leftJoinKeys []*expression.Column, isNullEQ []bool, leftNAJoinKeys []*expression.Column) bool {
if !isGAForHashJoinV2(joinType, leftJoinKeys, isNullEQ, leftNAJoinKeys) && !joinversion.UseHashJoinV2ForNonGAJoin {
return false
}
switch joinType {
case logicalop.LeftOuterJoin, logicalop.RightOuterJoin, logicalop.InnerJoin, logicalop.LeftOuterSemiJoin, logicalop.SemiJoin, logicalop.AntiSemiJoin:
// null aware join is not supported yet
if len(leftNAJoinKeys) > 0 {
return false
}
// cross join is not supported
if len(leftJoinKeys) == 0 {
return false
}
// NullEQ is not supported yet
for _, value := range isNullEQ {
if value {
return false
}
}
return true
default:
return false
}
}

func getHashJoins(p *logicalop.LogicalJoin, prop *property.PhysicalProperty) (joins []base.PhysicalPlan, forced bool) {
if !prop.IsSortItemEmpty() { // hash join doesn't promise any orders
return
Expand All @@ -383,17 +434,28 @@ func getHashJoins(p *logicalop.LogicalJoin, prop *property.PhysicalProperty) (jo
forceLeftToBuild = false
forceRightToBuild = false
}

joins = make([]base.PhysicalPlan, 0, 2)
switch p.JoinType {
case logicalop.SemiJoin, logicalop.AntiSemiJoin:
if !forceLeftToBuild {
joins = append(joins, getHashJoin(p, prop, 1, false))
} else if !forceRightToBuild {
joins = append(joins, getHashJoin(p, prop, 1, true))
leftJoinKeys, _, isNullEQ, _ := p.GetJoinKeys()
leftNAJoinKeys, _ := p.GetNAJoinKeys()
if p.SCtx().GetSessionVars().UseHashJoinV2 && joinversion.IsHashJoinV2Supported() && canUseHashJoinV2(p.JoinType, leftJoinKeys, isNullEQ, leftNAJoinKeys) {
if !forceLeftToBuild {
joins = append(joins, getHashJoin(p, prop, 1, false))
} else if !forceRightToBuild {
joins = append(joins, getHashJoin(p, prop, 1, true))
} else {
joins = append(joins, getHashJoin(p, prop, 1, false))
joins = append(joins, getHashJoin(p, prop, 1, true))
}
} else {
joins = append(joins, getHashJoin(p, prop, 1, false))
joins = append(joins, getHashJoin(p, prop, 1, true))
if forceLeftToBuild || forceRightToBuild {
// Do not support specifying the build and probe side for semi join.
p.SCtx().GetSessionVars().StmtCtx.SetHintWarning(fmt.Sprintf("We can't use the HASH_JOIN_BUILD or HASH_JOIN_PROBE hint for %s, please check the hint", p.JoinType))
forceLeftToBuild = false
forceRightToBuild = false
}
}
case logicalop.LeftOuterSemiJoin, logicalop.AntiLeftOuterSemiJoin:
joins = append(joins, getHashJoin(p, prop, 1, false))
Expand Down
48 changes: 1 addition & 47 deletions pkg/planner/core/physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"unsafe"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/executor/join/joinversion"
"github.com/pingcap/tidb/pkg/expression"
"github.com/pingcap/tidb/pkg/expression/aggregation"
"github.com/pingcap/tidb/pkg/kv"
Expand Down Expand Up @@ -1490,54 +1489,9 @@ type PhysicalHashJoin struct {
runtimeFilterList []*RuntimeFilter `plan-cache-clone:"must-nil"` // plan with runtime filter is not cached
}

func (p *PhysicalHashJoin) isGAForHashJoinV2() bool {
// nullaware join
if len(p.LeftNAJoinKeys) > 0 {
return false
}
// cross join
if len(p.LeftJoinKeys) == 0 {
return false
}
// join with null equal condition
for _, value := range p.IsNullEQ {
if value {
return false
}
}
switch p.JoinType {
case logicalop.LeftOuterJoin, logicalop.RightOuterJoin, logicalop.InnerJoin:
return true
default:
return false
}
}

// CanUseHashJoinV2 returns true if current join is supported by hash join v2
hawkingrei marked this conversation as resolved.
Show resolved Hide resolved
func (p *PhysicalHashJoin) CanUseHashJoinV2() bool {
if !p.isGAForHashJoinV2() && !joinversion.UseHashJoinV2ForNonGAJoin {
return false
}
switch p.JoinType {
case logicalop.LeftOuterJoin, logicalop.RightOuterJoin, logicalop.InnerJoin, logicalop.LeftOuterSemiJoin, logicalop.SemiJoin, logicalop.AntiSemiJoin, logicalop.AntiLeftOuterSemiJoin:
// null aware join is not supported yet
if len(p.LeftNAJoinKeys) > 0 {
return false
}
// cross join is not supported
if len(p.LeftJoinKeys) == 0 {
return false
}
// NullEQ is not supported yet
for _, value := range p.IsNullEQ {
if value {
return false
}
}
return true
default:
return false
}
return canUseHashJoinV2(p.JoinType, p.LeftJoinKeys, p.IsNullEQ, p.LeftNAJoinKeys)
}

// Clone implements op.PhysicalPlan interface.
Expand Down