-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
statistics: test the auto analyze and feedback for the global-level stats #23181
Changes from 10 commits
167723c
d866ce2
fd5adaf
12e48d4
d863fc1
ad341bc
80711d4
e8c97e5
5c48a82
01f9e7f
405debd
f914859
9fb318d
4027f71
787c204
f7e22a1
5744f8c
d8e609c
e50fbdc
41d7447
ba07e13
d7a6a08
e099d1f
6eb6a9d
db272c6
ad11f33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1601,6 +1601,104 @@ func (s *statsSerialSuite) TestGCIndexUsageInformation(c *C) { | |
tk.MustQuery(querySQL).Check(testkit.Rows("0")) | ||
} | ||
|
||
func (s *testStatsSuite) TestFeedbackWithGlobalStats(c *C) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we use |
||
defer cleanEnv(c, s.store, s.do) | ||
testKit := testkit.NewTestKit(c, s.store) | ||
testKit.MustExec("use test") | ||
testKit.MustExec("set @@tidb_analyze_version = 1") | ||
|
||
oriProbability := statistics.FeedbackProbability.Load() | ||
oriNumber := statistics.MaxNumberOfRanges | ||
oriMinLogCount := handle.MinLogScanCount | ||
oriErrorRate := handle.MinLogErrorRate | ||
defer func() { | ||
statistics.FeedbackProbability.Store(oriProbability) | ||
statistics.MaxNumberOfRanges = oriNumber | ||
handle.MinLogScanCount = oriMinLogCount | ||
handle.MinLogErrorRate = oriErrorRate | ||
}() | ||
// Case 1: You can't set tidb_analyze_version to 2 if feedback is enabled. | ||
// Note: if we want to set @@tidb_partition_prune_mode = 'dynamic'. We must set tidb_analyze_version to 2 first. We have already tested this. | ||
statistics.FeedbackProbability.Store(1) | ||
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1")) | ||
testKit.MustExec("set @@tidb_analyze_version = 2") | ||
testKit.MustQuery("show warnings").Check(testkit.Rows(`Error 1105 variable tidb_analyze_version not updated because analyze version 2 is incompatible with query feedback. Please consider setting feedback-probability to 0.0 in config file to disable query feedback`)) | ||
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1")) | ||
|
||
// Case 2: Feedback wouldn't be applied on version 2 and global-level statistics. | ||
statistics.FeedbackProbability.Store(0) | ||
testKit.MustExec("set @@tidb_analyze_version = 2") | ||
testKit.MustExec("set @@tidb_partition_prune_mode = 'dynamic';") | ||
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("2")) | ||
testKit.MustExec("create table t (a bigint(64), b bigint(64), index idx(b)) PARTITION BY HASH(a) PARTITIONS 2;") | ||
for i := 0; i < 200; i++ { | ||
testKit.MustExec("insert into t values (1,2),(2,2),(4,5),(2,3),(3,4)") | ||
} | ||
testKit.MustExec("analyze table t with 0 topn") | ||
h := s.do.StatsHandle() | ||
is := s.do.InfoSchema() | ||
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) | ||
c.Assert(err, IsNil) | ||
tblInfo := table.Meta() | ||
testKit.MustExec("analyze table t") | ||
err = h.Update(s.do.InfoSchema()) | ||
c.Assert(err, IsNil) | ||
statsTblBefore := h.GetTableStats(tblInfo) | ||
statistics.FeedbackProbability.Store(1) | ||
// make the statistics inaccurate. | ||
for i := 0; i < 200; i++ { | ||
testKit.MustExec("insert into t values (3,4), (3,4), (3,4), (3,4), (3,4)") | ||
} | ||
// trigger feedback | ||
testKit.MustExec("select b from t partition(p0) use index(idx) where t.b <= 3;") | ||
testKit.MustExec("select b from t partition(p1) use index(idx) where t.b <= 3;") | ||
testKit.MustExec("select b from t use index(idx) where t.b <= 3 order by b;") | ||
testKit.MustExec("select b from t use index(idx) where t.b <= 3;") | ||
|
||
h.UpdateStatsByLocalFeedback(s.do.InfoSchema()) | ||
err = h.DumpStatsFeedbackToKV() | ||
c.Assert(err, IsNil) | ||
err = h.HandleUpdateStats(s.do.InfoSchema()) | ||
c.Assert(err, IsNil) | ||
statsTblAfter := h.GetTableStats(tblInfo) | ||
// assert that statistics not changed | ||
// the feedback can not work for the partition table in dynamic mode | ||
assertTableEqual(c, statsTblBefore, statsTblAfter) | ||
|
||
// Case 3: Feedback is also not effective on version 1 and partition-level statistics. | ||
testKit.MustExec("set tidb_analyze_version = 1") | ||
testKit.MustExec("set @@tidb_partition_prune_mode = 'static';") | ||
testKit.MustExec("create table t1 (a bigint(64), b bigint(64), index idx(b)) PARTITION BY HASH(a) PARTITIONS 2") | ||
for i := 0; i < 200; i++ { | ||
testKit.MustExec("insert into t1 values (1,2),(2,2),(4,5),(2,3),(3,4)") | ||
} | ||
testKit.MustExec("analyze table t1 with 0 topn") | ||
// make the statistics inaccurate. | ||
for i := 0; i < 200; i++ { | ||
testKit.MustExec("insert into t1 values (3,4), (3,4), (3,4), (3,4), (3,4)") | ||
} | ||
is = s.do.InfoSchema() | ||
table, err = is.TableByName(model.NewCIStr("test"), model.NewCIStr("t1")) | ||
c.Assert(err, IsNil) | ||
tblInfo = table.Meta() | ||
statsTblBefore = h.GetTableStats(tblInfo) | ||
// trigger feedback | ||
testKit.MustExec("select b from t partition(p0) use index(idx) where t.b <= 3;") | ||
testKit.MustExec("select b from t partition(p1) use index(idx) where t.b <= 3;") | ||
testKit.MustExec("select b from t use index(idx) where t.b <= 3 order by b;") | ||
testKit.MustExec("select b from t use index(idx) where t.b <= 3;") | ||
|
||
h.UpdateStatsByLocalFeedback(s.do.InfoSchema()) | ||
err = h.DumpStatsFeedbackToKV() | ||
c.Assert(err, IsNil) | ||
err = h.HandleUpdateStats(s.do.InfoSchema()) | ||
c.Assert(err, IsNil) | ||
statsTblAfter = h.GetTableStats(tblInfo) | ||
// assert that statistics not changed | ||
// the feedback can not work for the partition table in static mode | ||
assertTableEqual(c, statsTblBefore, statsTblAfter) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about creating a local lambda function for this logic like |
||
} | ||
|
||
func (s *testStatsSuite) TestExtendedStatsPartitionTable(c *C) { | ||
defer cleanEnv(c, s.store, s.do) | ||
tk := testkit.NewTestKit(c, s.store) | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -808,8 +808,8 @@ func (s *testStatsSuite) TestUpdatePartitionErrorRate(c *C) { | |||||
c.Assert(h.Update(is), IsNil) | ||||||
tbl = h.GetPartitionStats(tblInfo, pid) | ||||||
|
||||||
// The error rate of this column is not larger than MaxErrorRate now. | ||||||
c.Assert(tbl.Columns[aID].NotAccurate(), IsFalse) | ||||||
// Feedback will not take effect under partition table. | ||||||
c.Assert(tbl.Columns[aID].NotAccurate(), IsTrue) | ||||||
} | ||||||
|
||||||
func appendBucket(h *statistics.Histogram, l, r int64) { | ||||||
|
@@ -1014,6 +1014,7 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) { | |||||
handle.MinLogErrorRate = 0 | ||||||
|
||||||
h := s.do.StatsHandle() | ||||||
// Feedback will not take effect under partition table. | ||||||
tests := []struct { | ||||||
sql string | ||||||
hist string | ||||||
|
@@ -1022,23 +1023,23 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) { | |||||
{ | ||||||
// test primary key feedback | ||||||
sql: "select * from t where t.a <= 5", | ||||||
hist: "column:1 ndv:2 totColSize:0\n" + | ||||||
"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 0\n" + | ||||||
"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 0", | ||||||
hist: "column:1 ndv:2 totColSize:2\n" + | ||||||
"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n" + | ||||||
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0", | ||||||
idxCols: 0, | ||||||
}, | ||||||
{ | ||||||
// test index feedback by double read | ||||||
sql: "select * from t use index(idx) where t.b <= 5", | ||||||
hist: "index:1 ndv:1\n" + | ||||||
"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0", | ||||||
"num: 2 lower_bound: 2 upper_bound: 2 repeats: 2 ndv: 0", | ||||||
idxCols: 1, | ||||||
}, | ||||||
{ | ||||||
// test index feedback by single read | ||||||
sql: "select b from t use index(idx) where t.b <= 5", | ||||||
hist: "index:1 ndv:1\n" + | ||||||
"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0", | ||||||
"num: 2 lower_bound: 2 upper_bound: 2 repeats: 2 ndv: 0", | ||||||
idxCols: 1, | ||||||
}, | ||||||
} | ||||||
|
@@ -1215,10 +1216,11 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) { | |||||
pid := tblInfo.Partition.Definitions[0].ID | ||||||
tbl := h.GetPartitionStats(tblInfo, pid) | ||||||
|
||||||
// // Feedback will not take effect under partition table. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+ | ||||||
"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n"+ | ||||||
"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+ | ||||||
"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0") | ||||||
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0\n"+ | ||||||
"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 0") | ||||||
} | ||||||
|
||||||
func (s *testStatsSuite) TestFeedbackWithStatsVer2(c *C) { | ||||||
|
@@ -2085,6 +2087,7 @@ func (s *testSerialStatsSuite) TestAutoUpdatePartitionInDynamicOnlyMode(c *C) { | |||||
testKit := testkit.NewTestKit(c, s.store) | ||||||
testkit.WithPruneMode(testKit, variable.DynamicOnly, func() { | ||||||
testKit.MustExec("use test") | ||||||
testKit.MustExec("set @@tidb_analyze_version = 2;") | ||||||
testKit.MustExec("drop table if exists t") | ||||||
testKit.MustExec(`create table t (a int, b varchar(10), index idx_ab(a, b)) | ||||||
partition by range (a) ( | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please also add this check into
Handle.UpdateStatsByLocalFeedback
where TiDB uses feedbacks to update its statistics, which makes it safer and can completely remove the impact of feedback on global-stats?