Skip to content

Commit

Permalink
statistics: test the auto analyze and feedback for the global-level s…
Browse files Browse the repository at this point in the history
…tats (#23181)
  • Loading branch information
Reminiscent authored Mar 10, 2021
1 parent 219aea4 commit 168ef88
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 13 deletions.
6 changes: 3 additions & 3 deletions executor/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,8 +508,8 @@ func (s *testFastAnalyze) TestFastAnalyze(c *C) {
*/

// test fast analyze in dynamic mode
tk.MustExec("set @@tidb_analyze_version = 2;")
tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic';")
tk.MustExec("set @@session.tidb_analyze_version = 2;")
tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic';")
tk.MustExec("drop table if exists t4;")
tk.MustExec("create table t4(a int, b int) PARTITION BY HASH(a) PARTITIONS 2;")
tk.MustExec("insert into t4 values(1,1),(3,3),(4,4),(2,2),(5,5);")
Expand Down Expand Up @@ -632,7 +632,7 @@ func (s *testSuite1) testAnalyzeIncremental(tk *testkit.TestKit, c *C) {
c.Assert(tblStats.Indices[tblInfo.Indices[0].ID].QueryBytes(val), Equals, uint64(1))

// test analyzeIndexIncremental for global-level stats;
tk.MustExec("set @@tidb_analyze_version = 2;")
tk.MustExec("set @@session.tidb_analyze_version = 2;")
tk.MustExec("set @@tidb_partition_prune_mode = 'static';")
tk.MustExec("drop table if exists t;")
tk.MustExec(`create table t (a int, b int, primary key(a), index idx(b)) partition by range (a) (
Expand Down
12 changes: 12 additions & 0 deletions executor/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -2559,6 +2559,10 @@ func buildNoRangeTableReader(b *executorBuilder, v *plannercore.PhysicalTableRea
e.feedback = statistics.NewQueryFeedback(getFeedbackStatsTableID(e.ctx, tbl), ts.Hist, int64(ts.StatsCount()), ts.Desc)
}
collect := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(ts.Ranges))
// Do not collect the feedback when the table is the partition table.
if collect && tbl.Meta().Partition != nil {
collect = false
}
if !collect {
e.feedback.Invalidate()
}
Expand Down Expand Up @@ -2830,6 +2834,10 @@ func buildNoRangeIndexReader(b *executorBuilder, v *plannercore.PhysicalIndexRea
e.feedback = statistics.NewQueryFeedback(tblID, is.Hist, int64(is.StatsCount()), is.Desc)
}
collect := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(is.Ranges))
// Do not collect the feedback when the table is the partition table.
if collect && tbl.Meta().Partition != nil {
collect = false
}
if !collect {
e.feedback.Invalidate()
}
Expand Down Expand Up @@ -2968,6 +2976,10 @@ func buildNoRangeIndexLookUpReader(b *executorBuilder, v *plannercore.PhysicalIn
collectTable := false
e.tableRequest.CollectRangeCounts = &collectTable
collectIndex := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(is.Ranges))
// Do not collect the feedback when the table is the partition table.
if collectIndex && tbl.Meta().Partition != nil {
collectIndex = false
}
if !collectIndex {
e.feedback.Invalidate()
}
Expand Down
94 changes: 94 additions & 0 deletions statistics/handle/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1718,6 +1718,100 @@ func (s *statsSerialSuite) TestGCIndexUsageInformation(c *C) {
tk.MustQuery(querySQL).Check(testkit.Rows("0"))
}

func (s *statsSerialSuite) TestFeedbackWithGlobalStats(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
testKit.MustExec("use test")
testKit.MustExec("set @@tidb_analyze_version = 1")

oriProbability := statistics.FeedbackProbability.Load()
oriNumber := statistics.MaxNumberOfRanges
oriMinLogCount := handle.MinLogScanCount
oriErrorRate := handle.MinLogErrorRate
defer func() {
statistics.FeedbackProbability.Store(oriProbability)
statistics.MaxNumberOfRanges = oriNumber
handle.MinLogScanCount = oriMinLogCount
handle.MinLogErrorRate = oriErrorRate
}()
// Case 1: You can't set tidb_analyze_version to 2 if feedback is enabled.
// Note: if we want to set @@tidb_partition_prune_mode = 'dynamic'. We must set tidb_analyze_version to 2 first. We have already tested this.
statistics.FeedbackProbability.Store(1)
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1"))
testKit.MustExec("set @@tidb_analyze_version = 2")
testKit.MustQuery("show warnings").Check(testkit.Rows(`Error 1105 variable tidb_analyze_version not updated because analyze version 2 is incompatible with query feedback. Please consider setting feedback-probability to 0.0 in config file to disable query feedback`))
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1"))

h := s.do.StatsHandle()
var err error
// checkFeedbackOnPartitionTable is used to check whether the statistics are the same as before.
checkFeedbackOnPartitionTable := func(statsBefore *statistics.Table, tblInfo *model.TableInfo) {
h.UpdateStatsByLocalFeedback(s.do.InfoSchema())
err = h.DumpStatsFeedbackToKV()
c.Assert(err, IsNil)
err = h.HandleUpdateStats(s.do.InfoSchema())
c.Assert(err, IsNil)
statsTblAfter := h.GetTableStats(tblInfo)
// assert that statistics not changed
// the feedback can not work for the partition table in both static and dynamic mode
assertTableEqual(c, statsBefore, statsTblAfter)
}

// Case 2: Feedback wouldn't be applied on version 2 and global-level statistics.
statistics.FeedbackProbability.Store(0)
testKit.MustExec("set @@tidb_analyze_version = 2")
testKit.MustExec("set @@tidb_partition_prune_mode = 'dynamic';")
testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("2"))
testKit.MustExec("create table t (a bigint(64), b bigint(64), index idx(b)) PARTITION BY HASH(a) PARTITIONS 2;")
for i := 0; i < 200; i++ {
testKit.MustExec("insert into t values (1,2),(2,2),(4,5),(2,3),(3,4)")
}
testKit.MustExec("analyze table t with 0 topn")
is := s.do.InfoSchema()
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tblInfo := table.Meta()
testKit.MustExec("analyze table t")
err = h.Update(s.do.InfoSchema())
c.Assert(err, IsNil)
statsTblBefore := h.GetTableStats(tblInfo)
statistics.FeedbackProbability.Store(1)
// make the statistics inaccurate.
for i := 0; i < 200; i++ {
testKit.MustExec("insert into t values (3,4), (3,4), (3,4), (3,4), (3,4)")
}
// trigger feedback
testKit.MustExec("select b from t partition(p0) use index(idx) where t.b <= 3;")
testKit.MustExec("select b from t partition(p1) use index(idx) where t.b <= 3;")
testKit.MustExec("select b from t use index(idx) where t.b <= 3 order by b;")
testKit.MustExec("select b from t use index(idx) where t.b <= 3;")
checkFeedbackOnPartitionTable(statsTblBefore, tblInfo)

// Case 3: Feedback is also not effective on version 1 and partition-level statistics.
testKit.MustExec("set tidb_analyze_version = 1")
testKit.MustExec("set @@tidb_partition_prune_mode = 'static';")
testKit.MustExec("create table t1 (a bigint(64), b bigint(64), index idx(b)) PARTITION BY HASH(a) PARTITIONS 2")
for i := 0; i < 200; i++ {
testKit.MustExec("insert into t1 values (1,2),(2,2),(4,5),(2,3),(3,4)")
}
testKit.MustExec("analyze table t1 with 0 topn")
// make the statistics inaccurate.
for i := 0; i < 200; i++ {
testKit.MustExec("insert into t1 values (3,4), (3,4), (3,4), (3,4), (3,4)")
}
is = s.do.InfoSchema()
table, err = is.TableByName(model.NewCIStr("test"), model.NewCIStr("t1"))
c.Assert(err, IsNil)
tblInfo = table.Meta()
statsTblBefore = h.GetTableStats(tblInfo)
// trigger feedback
testKit.MustExec("select b from t1 partition(p0) use index(idx) where t1.b <= 3;")
testKit.MustExec("select b from t1 partition(p1) use index(idx) where t1.b <= 3;")
testKit.MustExec("select b from t1 use index(idx) where t1.b <= 3 order by b;")
testKit.MustExec("select b from t1 use index(idx) where t1.b <= 3;")
checkFeedbackOnPartitionTable(statsTblBefore, tblInfo)
}

func (s *testStatsSuite) TestExtendedStatsPartitionTable(c *C) {
defer cleanEnv(c, s.store, s.do)
tk := testkit.NewTestKit(c, s.store)
Expand Down
4 changes: 4 additions & 0 deletions statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,10 @@ OUTER:
if !ok {
continue
}
if table.Meta().Partition != nil {
// If the table is partition table, the feedback will not work.
continue
}
tblStats := h.GetPartitionStats(table.Meta(), fb.PhysicalID)
newTblStats := tblStats.Copy()
if fb.Tp == statistics.IndexType {
Expand Down
21 changes: 12 additions & 9 deletions statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -808,8 +808,8 @@ func (s *testStatsSuite) TestUpdatePartitionErrorRate(c *C) {
c.Assert(h.Update(is), IsNil)
tbl = h.GetPartitionStats(tblInfo, pid)

// The error rate of this column is not larger than MaxErrorRate now.
c.Assert(tbl.Columns[aID].NotAccurate(), IsFalse)
// Feedback will not take effect under partition table.
c.Assert(tbl.Columns[aID].NotAccurate(), IsTrue)
}

func appendBucket(h *statistics.Histogram, l, r int64) {
Expand Down Expand Up @@ -1014,6 +1014,7 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
handle.MinLogErrorRate = 0

h := s.do.StatsHandle()
// Feedback will not take effect under partition table.
tests := []struct {
sql string
hist string
Expand All @@ -1022,23 +1023,23 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
{
// test primary key feedback
sql: "select * from t where t.a <= 5",
hist: "column:1 ndv:2 totColSize:0\n" +
"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 0\n" +
"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 0",
hist: "column:1 ndv:2 totColSize:2\n" +
"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n" +
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0",
idxCols: 0,
},
{
// test index feedback by double read
sql: "select * from t use index(idx) where t.b <= 5",
hist: "index:1 ndv:1\n" +
"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0",
"num: 2 lower_bound: 2 upper_bound: 2 repeats: 2 ndv: 0",
idxCols: 1,
},
{
// test index feedback by single read
sql: "select b from t use index(idx) where t.b <= 5",
hist: "index:1 ndv:1\n" +
"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0",
"num: 2 lower_bound: 2 upper_bound: 2 repeats: 2 ndv: 0",
idxCols: 1,
},
}
Expand Down Expand Up @@ -1215,10 +1216,11 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) {
pid := tblInfo.Partition.Definitions[0].ID
tbl := h.GetPartitionStats(tblInfo, pid)

// Feedback will not take effect under partition table.
c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n"+
"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+
"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0")
"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0\n"+
"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 0")
}

func (s *testStatsSuite) TestFeedbackWithStatsVer2(c *C) {
Expand Down Expand Up @@ -2085,6 +2087,7 @@ func (s *testSerialStatsSuite) TestAutoUpdatePartitionInDynamicOnlyMode(c *C) {
testKit := testkit.NewTestKit(c, s.store)
testkit.WithPruneMode(testKit, variable.DynamicOnly, func() {
testKit.MustExec("use test")
testKit.MustExec("set @@tidb_analyze_version = 2;")
testKit.MustExec("drop table if exists t")
testKit.MustExec(`create table t (a int, b varchar(10), index idx_ab(a, b))
partition by range (a) (
Expand Down
2 changes: 1 addition & 1 deletion statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ func (s *testIntegrationSuite) TestGlobalStats(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t;")
tk.MustExec("set @@tidb_analyze_version = 2;")
tk.MustExec("set @@session.tidb_analyze_version = 2;")
tk.MustExec(`create table t (a int, key(a)) partition by range (a) (
partition p0 values less than (10),
partition p1 values less than (20),
Expand Down

0 comments on commit 168ef88

Please sign in to comment.