Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: test the auto analyze and feedback for the global-level stats #23181

Merged
merged 26 commits into from
Mar 10, 2021
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
167723c
add test cases for auto anlyze and feedback
Reminiscent Mar 8, 2021
d866ce2
fix test
Reminiscent Mar 8, 2021
fd5adaf
statistics: test the auto analyze and feedback for the global-level s…
Reminiscent Mar 9, 2021
12e48d4
Merge branch 'master' of https://github.com/pingcap/tidb into testGlo…
Reminiscent Mar 9, 2021
d863fc1
fix ut and refactor the code
Reminiscent Mar 9, 2021
ad341bc
forbid the feedback for the partition table
Reminiscent Mar 9, 2021
80711d4
change the comment
Reminiscent Mar 9, 2021
e8c97e5
update the test
Reminiscent Mar 9, 2021
5c48a82
Merge branch 'master' of https://github.com/pingcap/tidb into testGlo…
Reminiscent Mar 9, 2021
01f9e7f
Merge branch 'master' into testGlobalHist4MutilColumns
Reminiscent Mar 10, 2021
405debd
Merge branch 'master' of https://github.com/pingcap/tidb into testGlo…
Reminiscent Mar 10, 2021
f914859
address comments
Reminiscent Mar 10, 2021
9fb318d
Merge remote-tracking branch 'origin/testGlobalHist4MutilColumns' int…
Reminiscent Mar 10, 2021
4027f71
address comments
Reminiscent Mar 10, 2021
787c204
Merge branch 'master' into testGlobalHist4MutilColumns
qw4990 Mar 10, 2021
f7e22a1
address comments
Reminiscent Mar 10, 2021
5744f8c
Merge remote-tracking branch 'origin/testGlobalHist4MutilColumns' int…
Reminiscent Mar 10, 2021
d8e609c
address comments
Reminiscent Mar 10, 2021
e50fbdc
Merge branch 'master' of https://github.com/pingcap/tidb into testGlo…
Reminiscent Mar 10, 2021
41d7447
Merge branch 'master' into testGlobalHist4MutilColumns
ti-chi-bot Mar 10, 2021
ba07e13
Merge branch 'master' into testGlobalHist4MutilColumns
qw4990 Mar 10, 2021
d7a6a08
Merge branch 'master' into testGlobalHist4MutilColumns
ti-chi-bot Mar 10, 2021
e099d1f
Merge branch 'master' into testGlobalHist4MutilColumns
ti-chi-bot Mar 10, 2021
6eb6a9d
Merge branch 'master' into testGlobalHist4MutilColumns
ti-chi-bot Mar 10, 2021
db272c6
Merge branch 'master' into testGlobalHist4MutilColumns
ti-chi-bot Mar 10, 2021
ad11f33
Merge branch 'master' into testGlobalHist4MutilColumns
ti-chi-bot Mar 10, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions executor/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2559,6 +2559,10 @@ func buildNoRangeTableReader(b *executorBuilder, v *plannercore.PhysicalTableRea
e.feedback = statistics.NewQueryFeedback(getFeedbackStatsTableID(e.ctx, tbl), ts.Hist, int64(ts.StatsCount()), ts.Desc)
}
collect := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(ts.Ranges))
// Do not collect the feedback when the table is the partition table.
if collect && tbl.Meta().Partition != nil {
collect = false
}
Comment on lines +2562 to +2565
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please also add this check into Handle.UpdateStatsByLocalFeedback where TiDB uses feedbacks to update its statistics, which makes it safer and can completely remove the impact of feedback on global-stats?

if !collect {
e.feedback.Invalidate()
}
Expand Down Expand Up @@ -2830,6 +2834,10 @@ func buildNoRangeIndexReader(b *executorBuilder, v *plannercore.PhysicalIndexRea
e.feedback = statistics.NewQueryFeedback(tblID, is.Hist, int64(is.StatsCount()), is.Desc)
}
collect := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(is.Ranges))
// Do not collect the feedback when the table is the partition table.
if collect && tbl.Meta().Partition != nil {
collect = false
}
if !collect {
e.feedback.Invalidate()
}
Expand Down Expand Up @@ -2968,6 +2976,10 @@ func buildNoRangeIndexLookUpReader(b *executorBuilder, v *plannercore.PhysicalIn
collectTable := false
e.tableRequest.CollectRangeCounts = &collectTable
collectIndex := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(is.Ranges))
// Do not collect the feedback when the table is the partition table.
if collectIndex && tbl.Meta().Partition != nil {
collectIndex = false
}
if !collectIndex {
e.feedback.Invalidate()
}
Expand Down
98 changes: 98 additions & 0 deletions statistics/handle/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,104 @@ func (s *statsSerialSuite) TestGCIndexUsageInformation(c *C) {
tk.MustQuery(querySQL).Check(testkit.Rows("0"))
}

func (s *testStatsSuite) TestFeedbackWithGlobalStats(c *C) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use statsSerialSuite here?

defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
testKit.MustExec("use test")
testKit.MustExec("set @@tidb_analyze_version = 1")

oriProbability := statistics.FeedbackProbability.Load()
oriNumber := statistics.MaxNumberOfRanges
oriMinLogCount := handle.MinLogScanCount
oriErrorRate := handle.MinLogErrorRate
defer func() {
statistics.FeedbackProbability.Store(oriProbability)
statistics.MaxNumberOfRanges = oriNumber
handle.MinLogScanCount = oriMinLogCount
handle.MinLogErrorRate = oriErrorRate
}()
// Case 1: You can't set tidb_analyze_version to 2 if feedback is enabled.
// Note: if we want to set @@tidb_partition_prune_mode = 'dynamic'. We must set tidb_analyze_version to 2 first. We have already tested this.
statistics.FeedbackProbability.Store(1)
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1"))
testKit.MustExec("set @@tidb_analyze_version = 2")
testKit.MustQuery("show warnings").Check(testkit.Rows(`Error 1105 variable tidb_analyze_version not updated because analyze version 2 is incompatible with query feedback. Please consider setting feedback-probability to 0.0 in config file to disable query feedback`))
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1"))

// Case 2: Feedback wouldn't be applied on version 2 and global-level statistics.
statistics.FeedbackProbability.Store(0)
testKit.MustExec("set @@tidb_analyze_version = 2")
testKit.MustExec("set @@tidb_partition_prune_mode = 'dynamic';")
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("2"))
testKit.MustExec("create table t (a bigint(64), b bigint(64), index idx(b)) PARTITION BY HASH(a) PARTITIONS 2;")
for i := 0; i < 200; i++ {
testKit.MustExec("insert into t values (1,2),(2,2),(4,5),(2,3),(3,4)")
}
testKit.MustExec("analyze table t with 0 topn")
h := s.do.StatsHandle()
is := s.do.InfoSchema()
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tblInfo := table.Meta()
testKit.MustExec("analyze table t")
err = h.Update(s.do.InfoSchema())
c.Assert(err, IsNil)
statsTblBefore := h.GetTableStats(tblInfo)
statistics.FeedbackProbability.Store(1)
// make the statistics inaccurate.
for i := 0; i < 200; i++ {
testKit.MustExec("insert into t values (3,4), (3,4), (3,4), (3,4), (3,4)")
}
// trigger feedback
testKit.MustExec("select b from t partition(p0) use index(idx) where t.b <= 3;")
testKit.MustExec("select b from t partition(p1) use index(idx) where t.b <= 3;")
testKit.MustExec("select b from t use index(idx) where t.b <= 3 order by b;")
testKit.MustExec("select b from t use index(idx) where t.b <= 3;")

h.UpdateStatsByLocalFeedback(s.do.InfoSchema())
err = h.DumpStatsFeedbackToKV()
c.Assert(err, IsNil)
err = h.HandleUpdateStats(s.do.InfoSchema())
c.Assert(err, IsNil)
statsTblAfter := h.GetTableStats(tblInfo)
// assert that statistics not changed
// the feedback can not work for the partition table in dynamic mode
assertTableEqual(c, statsTblBefore, statsTblAfter)

// Case 3: Feedback is also not effective on version 1 and partition-level statistics.
testKit.MustExec("set tidb_analyze_version = 1")
testKit.MustExec("set @@tidb_partition_prune_mode = 'static';")
testKit.MustExec("create table t1 (a bigint(64), b bigint(64), index idx(b)) PARTITION BY HASH(a) PARTITIONS 2")
for i := 0; i < 200; i++ {
testKit.MustExec("insert into t1 values (1,2),(2,2),(4,5),(2,3),(3,4)")
}
testKit.MustExec("analyze table t1 with 0 topn")
// make the statistics inaccurate.
for i := 0; i < 200; i++ {
testKit.MustExec("insert into t1 values (3,4), (3,4), (3,4), (3,4), (3,4)")
}
is = s.do.InfoSchema()
table, err = is.TableByName(model.NewCIStr("test"), model.NewCIStr("t1"))
c.Assert(err, IsNil)
tblInfo = table.Meta()
statsTblBefore = h.GetTableStats(tblInfo)
// trigger feedback
testKit.MustExec("select b from t partition(p0) use index(idx) where t.b <= 3;")
testKit.MustExec("select b from t partition(p1) use index(idx) where t.b <= 3;")
testKit.MustExec("select b from t use index(idx) where t.b <= 3 order by b;")
testKit.MustExec("select b from t use index(idx) where t.b <= 3;")

h.UpdateStatsByLocalFeedback(s.do.InfoSchema())
err = h.DumpStatsFeedbackToKV()
c.Assert(err, IsNil)
err = h.HandleUpdateStats(s.do.InfoSchema())
c.Assert(err, IsNil)
statsTblAfter = h.GetTableStats(tblInfo)
// assert that statistics not changed
// the feedback can not work for the partition table in static mode
assertTableEqual(c, statsTblBefore, statsTblAfter)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about creating a local lambda function for this logic like checkFeedbackOnPartitionTable so that both case2 and case3 can use it, which makes this test simpler?

}

func (s *testStatsSuite) TestExtendedStatsPartitionTable(c *C) {
defer cleanEnv(c, s.store, s.do)
tk := testkit.NewTestKit(c, s.store)
Expand Down
21 changes: 12 additions & 9 deletions statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -808,8 +808,8 @@ func (s *testStatsSuite) TestUpdatePartitionErrorRate(c *C) {
c.Assert(h.Update(is), IsNil)
tbl = h.GetPartitionStats(tblInfo, pid)

// The error rate of this column is not larger than MaxErrorRate now.
c.Assert(tbl.Columns[aID].NotAccurate(), IsFalse)
// Feedback will not take effect under partition table.
c.Assert(tbl.Columns[aID].NotAccurate(), IsTrue)
}

func appendBucket(h *statistics.Histogram, l, r int64) {
Expand Down Expand Up @@ -1014,6 +1014,7 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
handle.MinLogErrorRate = 0

h := s.do.StatsHandle()
// Feedback will not take effect under partition table.
tests := []struct {
sql string
hist string
Expand All @@ -1022,23 +1023,23 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
{
// test primary key feedback
sql: "select * from t where t.a <= 5",
hist: "column:1 ndv:2 totColSize:0\n" +
"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 0\n" +
"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 0",
hist: "column:1 ndv:2 totColSize:2\n" +
"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n" +
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0",
idxCols: 0,
},
{
// test index feedback by double read
sql: "select * from t use index(idx) where t.b <= 5",
hist: "index:1 ndv:1\n" +
"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0",
"num: 2 lower_bound: 2 upper_bound: 2 repeats: 2 ndv: 0",
idxCols: 1,
},
{
// test index feedback by single read
sql: "select b from t use index(idx) where t.b <= 5",
hist: "index:1 ndv:1\n" +
"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0",
"num: 2 lower_bound: 2 upper_bound: 2 repeats: 2 ndv: 0",
idxCols: 1,
},
}
Expand Down Expand Up @@ -1215,10 +1216,11 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) {
pid := tblInfo.Partition.Definitions[0].ID
tbl := h.GetPartitionStats(tblInfo, pid)

// // Feedback will not take effect under partition table.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// // Feedback will not take effect under partition table.
// Feedback will not take effect under partition table.

c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n"+
"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+
"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0")
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0\n"+
"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 0")
}

func (s *testStatsSuite) TestFeedbackWithStatsVer2(c *C) {
Expand Down Expand Up @@ -2085,6 +2087,7 @@ func (s *testSerialStatsSuite) TestAutoUpdatePartitionInDynamicOnlyMode(c *C) {
testKit := testkit.NewTestKit(c, s.store)
testkit.WithPruneMode(testKit, variable.DynamicOnly, func() {
testKit.MustExec("use test")
testKit.MustExec("set @@tidb_analyze_version = 2;")
testKit.MustExec("drop table if exists t")
testKit.MustExec(`create table t (a int, b varchar(10), index idx_ab(a, b))
partition by range (a) (
Expand Down