Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: update the correlation adjustment rule of Limit/TopN for TableScan (#26445) #26653

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions executor/set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ func (s *testSerialSuite1) TestSetVar(c *C) {
tk.MustExec("set @@tidb_enable_clustered_index = 'int_only'")
tk.MustQuery(`show warnings`).Check(testkit.Rows("Warning 1287 'INT_ONLY' is deprecated and will be removed in a future release. Please use 'ON' or 'OFF' instead"))

<<<<<<< HEAD
// test for tidb_enable_stable_result_mode
tk.MustQuery(`select @@tidb_enable_stable_result_mode`).Check(testkit.Rows("0"))
tk.MustExec(`set global tidb_enable_stable_result_mode = 1`)
Expand All @@ -557,6 +558,37 @@ func (s *testSerialSuite1) TestSetVar(c *C) {
tk.MustExec(`set tidb_enable_stable_result_mode=1`)
tk.MustQuery(`select @@global.tidb_enable_stable_result_mode`).Check(testkit.Rows("0"))
tk.MustQuery(`select @@tidb_enable_stable_result_mode`).Check(testkit.Rows("1"))
=======
// test for tidb_enable_ordered_result_mode
tk.MustQuery(`select @@tidb_enable_ordered_result_mode`).Check(testkit.Rows("0"))
tk.MustExec(`set global tidb_enable_ordered_result_mode = 1`)
tk.MustQuery(`select @@global.tidb_enable_ordered_result_mode`).Check(testkit.Rows("1"))
tk.MustExec(`set global tidb_enable_ordered_result_mode = 0`)
tk.MustQuery(`select @@global.tidb_enable_ordered_result_mode`).Check(testkit.Rows("0"))
tk.MustExec(`set tidb_enable_ordered_result_mode=1`)
tk.MustQuery(`select @@global.tidb_enable_ordered_result_mode`).Check(testkit.Rows("0"))
tk.MustQuery(`select @@tidb_enable_ordered_result_mode`).Check(testkit.Rows("1"))

// test for tidb_opt_enable_correlation_adjustment
tk.MustQuery(`select @@tidb_opt_enable_correlation_adjustment`).Check(testkit.Rows("1"))
tk.MustExec(`set global tidb_opt_enable_correlation_adjustment = 0`)
tk.MustQuery(`select @@global.tidb_opt_enable_correlation_adjustment`).Check(testkit.Rows("0"))
tk.MustExec(`set global tidb_opt_enable_correlation_adjustment = 1`)
tk.MustQuery(`select @@global.tidb_opt_enable_correlation_adjustment`).Check(testkit.Rows("1"))
tk.MustExec(`set tidb_opt_enable_correlation_adjustment=0`)
tk.MustQuery(`select @@global.tidb_opt_enable_correlation_adjustment`).Check(testkit.Rows("1"))
tk.MustQuery(`select @@tidb_opt_enable_correlation_adjustment`).Check(testkit.Rows("0"))

// test for tidb_opt_limit_push_down_threshold
tk.MustQuery(`select @@tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("100"))
tk.MustExec(`set global tidb_opt_limit_push_down_threshold = 20`)
tk.MustQuery(`select @@global.tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("20"))
tk.MustExec(`set global tidb_opt_limit_push_down_threshold = 100`)
tk.MustQuery(`select @@global.tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("100"))
tk.MustExec(`set tidb_opt_limit_push_down_threshold = 20`)
tk.MustQuery(`select @@global.tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("100"))
tk.MustQuery(`select @@tidb_opt_limit_push_down_threshold`).Check(testkit.Rows("20"))
>>>>>>> 51c48d2fa... planner: update the correlation adjustment rule of Limit/TopN for TableScan (#26445)
}

func (s *testSuite5) TestTruncateIncorrectIntSessionVar(c *C) {
Expand Down
16 changes: 10 additions & 6 deletions planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,7 @@ func convertRangeFromExpectedCnt(ranges []*ranger.Range, rangeCounts []float64,
// if order of column `a` is strictly correlated with column `pk`, the row count of table scan should be:
// `1 + row_count(a < 1 or a is null)`
func (ds *DataSource) crossEstimateTableRowCount(path *util.AccessPath, expectedCnt float64, desc bool) (float64, bool, float64) {
if ds.statisticTable.Pseudo || len(path.TableFilters) == 0 {
if ds.statisticTable.Pseudo || len(path.TableFilters) == 0 || !ds.ctx.GetSessionVars().EnableCorrelationAdjustment {
return 0, false, 0
}
col, corr := getMostCorrCol4Handle(path.TableFilters, ds.statisticTable, ds.ctx.GetSessionVars().CorrelationThreshold)
Expand Down Expand Up @@ -1408,7 +1408,7 @@ func (ds *DataSource) crossEstimateRowCount(path *util.AccessPath, conds []expre
func (ds *DataSource) crossEstimateIndexRowCount(path *util.AccessPath, expectedCnt float64, desc bool) (float64, bool, float64) {
filtersLen := len(path.TableFilters) + len(path.IndexFilters)
sessVars := ds.ctx.GetSessionVars()
if ds.statisticTable.Pseudo || filtersLen == 0 || !sessVars.EnableExtendedStats {
if ds.statisticTable.Pseudo || filtersLen == 0 || !sessVars.EnableExtendedStats || !ds.ctx.GetSessionVars().EnableCorrelationAdjustment {
return 0, false, 0
}
col, corr := getMostCorrCol4Index(path, ds.statisticTable, sessVars.CorrelationThreshold)
Expand Down Expand Up @@ -1809,7 +1809,10 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper
}
rowCount := path.CountAfterAccess
if prop.ExpectedCnt < ds.stats.RowCount {
count, ok, corr := ds.crossEstimateTableRowCount(path, prop.ExpectedCnt, isMatchProp && prop.SortItems[0].Desc)
selectivity := ds.stats.RowCount / path.CountAfterAccess
uniformEst := math.Min(path.CountAfterAccess, prop.ExpectedCnt/selectivity)

corrEst, ok, corr := ds.crossEstimateTableRowCount(path, prop.ExpectedCnt, isMatchProp && prop.SortItems[0].Desc)
if ok {
// TODO: actually, before using this count as the estimated row count of table scan, we need additionally
// check if count < row_count(first_region | last_region), and use the larger one since we build one copTask
Expand All @@ -1818,11 +1821,12 @@ func (ds *DataSource) getOriginalPhysicalTableScan(prop *property.PhysicalProper
// to get the row count in a region, but that result contains MVCC old version rows, so it is not that accurate.
// Considering that when this scenario happens, the execution time is close between IndexScan and TableScan,
// we do not add this check temporarily.
rowCount = count

// to reduce risks of correlation adjustment, use the maximum between uniformEst and corrEst
rowCount = math.Max(uniformEst, corrEst)
} else if abs := math.Abs(corr); abs < 1 {
correlationFactor := math.Pow(1-abs, float64(ds.ctx.GetSessionVars().CorrelationExpFactor))
selectivity := ds.stats.RowCount / rowCount
rowCount = math.Min(prop.ExpectedCnt/selectivity/correlationFactor, rowCount)
rowCount = math.Min(path.CountAfterAccess, uniformEst/correlationFactor)
}
}
// We need NDV of columns since it may be used in cost estimation of join. Precisely speaking,
Expand Down
135 changes: 135 additions & 0 deletions planner/core/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3533,7 +3533,142 @@ func (s *testIntegrationSerialSuite) TestMergeContinuousSelections(c *C) {
}
}

<<<<<<< HEAD
func (s *testIntegrationSuite) TestIssue23839(c *C) {
=======
func (s *testIntegrationSerialSuite) TestSelectIgnoreTemporaryTableInView(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")

tk.Se.Auth(&auth.UserIdentity{Username: "root", Hostname: "localhost", CurrentUser: true, AuthUsername: "root", AuthHostname: "%"}, nil, []byte("012345678901234567890"))
tk.MustExec("set @@tidb_enable_noop_functions=1")
tk.MustExec("create table t1 (a int, b int)")
tk.MustExec("create table t2 (c int, d int)")
tk.MustExec("create view v1 as select * from t1 order by a")
tk.MustExec("create view v2 as select * from ((select * from t1) union (select * from t2)) as tt order by a, b")
tk.MustExec("create view v3 as select * from v1 order by a")
tk.MustExec("create view v4 as select * from t1, t2 where t1.a = t2.c order by a, b")
tk.MustExec("create view v5 as select * from (select * from t1) as t1 order by a")

tk.MustExec("insert into t1 values (1, 2), (3, 4)")
tk.MustExec("insert into t2 values (3, 5), (6, 7)")

tk.MustExec("create temporary table t1 (a int, b int)")
tk.MustExec("create temporary table t2 (c int, d int)")
tk.MustQuery("select * from t1").Check(testkit.Rows())
tk.MustQuery("select * from t2").Check(testkit.Rows())

tk.MustQuery("select * from v1").Check(testkit.Rows("1 2", "3 4"))
tk.MustQuery("select * from v2").Check(testkit.Rows("1 2", "3 4", "3 5", "6 7"))
tk.MustQuery("select * from v3").Check(testkit.Rows("1 2", "3 4"))
tk.MustQuery("select * from v4").Check(testkit.Rows("3 4 3 5"))
tk.MustQuery("select * from v5").Check(testkit.Rows("1 2", "3 4"))

}

func (s *testIntegrationSerialSuite) TestIssue26250(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("create table tp (id int primary key) partition by range (id) (partition p0 values less than (100));")
tk.MustExec("create table tn (id int primary key);")
tk.MustExec("insert into tp values(1),(2);")
tk.MustExec("insert into tn values(1),(2);")
tk.MustQuery("select * from tp,tn where tp.id=tn.id and tn.id=1 for update;").Check(testkit.Rows("1 1"))
}

func (s *testIntegrationSuite) TestCorrelationAdjustment4Limit(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (pk int primary key auto_increment, year int, c varchar(256), index idx_year(year))")

insertWithYear := func(n, year int) {
for i := 0; i < n; i++ {
tk.MustExec(fmt.Sprintf("insert into t (year, c) values (%v, space(256))", year))
}
}
insertWithYear(10, 2000)
insertWithYear(10, 2001)
insertWithYear(10, 2002)
tk.MustExec("analyze table t")

// case 1
tk.MustExec("set @@tidb_opt_enable_correlation_adjustment = false")
// the estRow for TableFullScan is under-estimated since we have to scan through 2000 and 2001 to access 2002,
// but the formula(LimitNum / Selectivity) based on uniform-assumption cannot consider this factor.
tk.MustQuery("explain format=brief select * from t use index(primary) where year=2002 limit 1").Check(testkit.Rows(
"Limit 1.00 root offset:0, count:1",
"└─TableReader 1.00 root data:Limit",
" └─Limit 1.00 cop[tikv] offset:0, count:1",
" └─Selection 1.00 cop[tikv] eq(test.t.year, 2002)",
" └─TableFullScan 3.00 cop[tikv] table:t keep order:false"))

// case 2: after enabling correlation adjustment, this factor can be considered.
tk.MustExec("set @@tidb_opt_enable_correlation_adjustment = true")
tk.MustQuery("explain format=brief select * from t use index(primary) where year=2002 limit 1").Check(testkit.Rows(
"Limit 1.00 root offset:0, count:1",
"└─TableReader 1.00 root data:Limit",
" └─Limit 1.00 cop[tikv] offset:0, count:1",
" └─Selection 1.00 cop[tikv] eq(test.t.year, 2002)",
" └─TableFullScan 21.00 cop[tikv] table:t keep order:false"))

tk.MustExec("truncate table t")
for y := 2000; y <= 2050; y++ {
insertWithYear(2, y)
}
tk.MustExec("analyze table t")

// case 3: correlation adjustment is only allowed to update the upper-bound, so estRow = max(1/selectivity, adjustedCount);
// 1/sel = 1/(1/NDV) is around 50, adjustedCount is 1 since the first row can meet the requirement `year=2000`;
// in this case the estRow is over-estimated, but it's safer that can avoid to convert IndexScan to TableScan incorrectly in some cases.
tk.MustQuery("explain format=brief select * from t use index(primary) where year=2000 limit 1").Check(testkit.Rows(
"Limit 1.00 root offset:0, count:1",
"└─TableReader 1.00 root data:Limit",
" └─Limit 1.00 cop[tikv] offset:0, count:1",
" └─Selection 1.00 cop[tikv] eq(test.t.year, 2000)",
" └─TableFullScan 51.00 cop[tikv] table:t keep order:false"))
}

func (s *testIntegrationSerialSuite) TestCTESelfJoin(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t1, t2, t3")
tk.MustExec("create table t1(t1a int, t1b int, t1c int)")
tk.MustExec("create table t2(t2a int, t2b int, t2c int)")
tk.MustExec("create table t3(t3a int, t3b int, t3c int)")
tk.MustExec(`
with inv as
(select t1a , t3a, sum(t2c)
from t1, t2, t3
where t2a = t1a
and t2b = t3b
and t3c = 1998
group by t1a, t3a)
select inv1.t1a, inv2.t3a
from inv inv1, inv inv2
where inv1.t1a = inv2.t1a
and inv1.t3a = 4
and inv2.t3a = 4+1`)
}

// https://github.com/pingcap/tidb/issues/26214
func (s *testIntegrationSerialSuite) TestIssue26214(c *C) {
originalVal := config.GetGlobalConfig().Experimental.AllowsExpressionIndex
config.GetGlobalConfig().Experimental.AllowsExpressionIndex = true
defer func() {
config.GetGlobalConfig().Experimental.AllowsExpressionIndex = originalVal
}()

tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table `t` (`a` int(11) default null, `b` int(11) default null, `c` int(11) default null, key `expression_index` ((case when `a` < 0 then 1 else 2 end)))")
_, err := tk.Exec("select * from t where case when a < 0 then 1 else 2 end <= 1 order by 4;")
c.Assert(core.ErrUnknownColumn.Equal(err), IsTrue)
}

func (s *testIntegrationSerialSuite) TestLimitPushDown(c *C) {
>>>>>>> 51c48d2fa... planner: update the correlation adjustment rule of Limit/TopN for TableScan (#26445)
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists BB")
Expand Down
8 changes: 8 additions & 0 deletions sessionctx/variable/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,9 @@ type SessionVars struct {
// CorrelationThreshold is the guard to enable row count estimation using column order correlation.
CorrelationThreshold float64

// EnableCorrelationAdjustment is used to indicate if correlation adjustment is enabled.
EnableCorrelationAdjustment bool

// CorrelationExpFactor is used to control the heuristic approach of row count estimation when CorrelationThreshold is not met.
CorrelationExpFactor int

Expand Down Expand Up @@ -1002,6 +1005,11 @@ func NewSessionVars() *SessionVars {
DDLReorgPriority: kv.PriorityLow,
allowInSubqToJoinAndAgg: DefOptInSubqToJoinAndAgg,
preferRangeScan: DefOptPreferRangeScan,
<<<<<<< HEAD
=======
EnableCorrelationAdjustment: DefOptEnableCorrelationAdjustment,
LimitPushDownThreshold: DefOptLimitPushDownThreshold,
>>>>>>> 51c48d2fa... planner: update the correlation adjustment rule of Limit/TopN for TableScan (#26445)
CorrelationThreshold: DefOptCorrelationThreshold,
CorrelationExpFactor: DefOptCorrelationExpFactor,
CPUFactor: DefOptCPUFactor,
Expand Down
Loading