statistics: test the auto analyze and feedback for the global-level s…

…tats (#23181)
pingcap · Mar 10, 2021 · 168ef88 · 168ef88
1 parent 219aea4
commit 168ef88
Show file tree

Hide file tree

Showing 6 changed files with 126 additions and 13 deletions.
diff --git a/executor/analyze_test.go b/executor/analyze_test.go
@@ -508,8 +508,8 @@ func (s *testFastAnalyze) TestFastAnalyze(c *C) {
 	*/
 
 	// test fast analyze in dynamic mode
-	tk.MustExec("set @@tidb_analyze_version = 2;")
-	tk.MustExec("set @@tidb_partition_prune_mode = 'dynamic';")
+	tk.MustExec("set @@session.tidb_analyze_version = 2;")
+	tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic';")
 	tk.MustExec("drop table if exists t4;")
 	tk.MustExec("create table t4(a int, b int) PARTITION BY HASH(a) PARTITIONS 2;")
 	tk.MustExec("insert into t4 values(1,1),(3,3),(4,4),(2,2),(5,5);")
@@ -632,7 +632,7 @@ func (s *testSuite1) testAnalyzeIncremental(tk *testkit.TestKit, c *C) {
 	c.Assert(tblStats.Indices[tblInfo.Indices[0].ID].QueryBytes(val), Equals, uint64(1))
 
 	// test analyzeIndexIncremental for global-level stats;
-	tk.MustExec("set @@tidb_analyze_version = 2;")
+	tk.MustExec("set @@session.tidb_analyze_version = 2;")
 	tk.MustExec("set @@tidb_partition_prune_mode = 'static';")
 	tk.MustExec("drop table if exists t;")
 	tk.MustExec(`create table t (a int, b int, primary key(a), index idx(b)) partition by range (a) (

diff --git a/executor/builder.go b/executor/builder.go
@@ -2559,6 +2559,10 @@ func buildNoRangeTableReader(b *executorBuilder, v *plannercore.PhysicalTableRea
 		e.feedback = statistics.NewQueryFeedback(getFeedbackStatsTableID(e.ctx, tbl), ts.Hist, int64(ts.StatsCount()), ts.Desc)
 	}
 	collect := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(ts.Ranges))
+	// Do not collect the feedback when the table is the partition table.
+	if collect && tbl.Meta().Partition != nil {
+		collect = false
+	}
 	if !collect {
 		e.feedback.Invalidate()
 	}
@@ -2830,6 +2834,10 @@ func buildNoRangeIndexReader(b *executorBuilder, v *plannercore.PhysicalIndexRea
 		e.feedback = statistics.NewQueryFeedback(tblID, is.Hist, int64(is.StatsCount()), is.Desc)
 	}
 	collect := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(is.Ranges))
+	// Do not collect the feedback when the table is the partition table.
+	if collect && tbl.Meta().Partition != nil {
+		collect = false
+	}
 	if !collect {
 		e.feedback.Invalidate()
 	}
@@ -2968,6 +2976,10 @@ func buildNoRangeIndexLookUpReader(b *executorBuilder, v *plannercore.PhysicalIn
 	collectTable := false
 	e.tableRequest.CollectRangeCounts = &collectTable
 	collectIndex := statistics.CollectFeedback(b.ctx.GetSessionVars().StmtCtx, e.feedback, len(is.Ranges))
+	// Do not collect the feedback when the table is the partition table.
+	if collectIndex && tbl.Meta().Partition != nil {
+		collectIndex = false
+	}
 	if !collectIndex {
 		e.feedback.Invalidate()
 	}

diff --git a/statistics/handle/handle_test.go b/statistics/handle/handle_test.go
@@ -1718,6 +1718,100 @@ func (s *statsSerialSuite) TestGCIndexUsageInformation(c *C) {
 	tk.MustQuery(querySQL).Check(testkit.Rows("0"))
 }
 
+func (s *statsSerialSuite) TestFeedbackWithGlobalStats(c *C) {
+	defer cleanEnv(c, s.store, s.do)
+	testKit := testkit.NewTestKit(c, s.store)
+	testKit.MustExec("use test")
+	testKit.MustExec("set @@tidb_analyze_version = 1")
+
+	oriProbability := statistics.FeedbackProbability.Load()
+	oriNumber := statistics.MaxNumberOfRanges
+	oriMinLogCount := handle.MinLogScanCount
+	oriErrorRate := handle.MinLogErrorRate
+	defer func() {
+		statistics.FeedbackProbability.Store(oriProbability)
+		statistics.MaxNumberOfRanges = oriNumber
+		handle.MinLogScanCount = oriMinLogCount
+		handle.MinLogErrorRate = oriErrorRate
+	}()
+	// Case 1: You can't set tidb_analyze_version to 2 if feedback is enabled.
+	// Note: if we want to set @@tidb_partition_prune_mode = 'dynamic'. We must set tidb_analyze_version to 2 first. We have already tested this.
+	statistics.FeedbackProbability.Store(1)
+	testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1"))
+	testKit.MustExec("set @@tidb_analyze_version = 2")
+	testKit.MustQuery("show warnings").Check(testkit.Rows(`Error 1105 variable tidb_analyze_version not updated because analyze version 2 is incompatible with query feedback. Please consider setting feedback-probability to 0.0 in config file to disable query feedback`))
+	testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1"))
+
+	h := s.do.StatsHandle()
+	var err error
+	// checkFeedbackOnPartitionTable is used to check whether the statistics are the same as before.
+	checkFeedbackOnPartitionTable := func(statsBefore *statistics.Table, tblInfo *model.TableInfo) {
+		h.UpdateStatsByLocalFeedback(s.do.InfoSchema())
+		err = h.DumpStatsFeedbackToKV()
+		c.Assert(err, IsNil)
+		err = h.HandleUpdateStats(s.do.InfoSchema())
+		c.Assert(err, IsNil)
+		statsTblAfter := h.GetTableStats(tblInfo)
+		// assert that statistics not changed
+		// the feedback can not work for the partition table in both static and dynamic mode
+		assertTableEqual(c, statsBefore, statsTblAfter)
+	}
+
+	// Case 2: Feedback wouldn't be applied on version 2 and global-level statistics.
+	statistics.FeedbackProbability.Store(0)
+	testKit.MustExec("set @@tidb_analyze_version = 2")
+	testKit.MustExec("set @@tidb_partition_prune_mode = 'dynamic';")
+	testKit.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("2"))
+	testKit.MustExec("create table t (a bigint(64), b bigint(64), index idx(b)) PARTITION BY HASH(a) PARTITIONS 2;")
+	for i := 0; i < 200; i++ {
+		testKit.MustExec("insert into t values (1,2),(2,2),(4,5),(2,3),(3,4)")
+	}
+	testKit.MustExec("analyze table t with 0 topn")
+	is := s.do.InfoSchema()
+	table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	c.Assert(err, IsNil)
+	tblInfo := table.Meta()
+	testKit.MustExec("analyze table t")
+	err = h.Update(s.do.InfoSchema())
+	c.Assert(err, IsNil)
+	statsTblBefore := h.GetTableStats(tblInfo)
+	statistics.FeedbackProbability.Store(1)
+	// make the statistics inaccurate.
+	for i := 0; i < 200; i++ {
+		testKit.MustExec("insert into t values (3,4), (3,4), (3,4), (3,4), (3,4)")
+	}
+	// trigger feedback
+	testKit.MustExec("select b from t partition(p0) use index(idx) where t.b <= 3;")
+	testKit.MustExec("select b from t partition(p1) use index(idx) where t.b <= 3;")
+	testKit.MustExec("select b from t use index(idx) where t.b <= 3 order by b;")
+	testKit.MustExec("select b from t use index(idx) where t.b <= 3;")
+	checkFeedbackOnPartitionTable(statsTblBefore, tblInfo)
+
+	// Case 3: Feedback is also not effective on version 1 and partition-level statistics.
+	testKit.MustExec("set tidb_analyze_version = 1")
+	testKit.MustExec("set @@tidb_partition_prune_mode = 'static';")
+	testKit.MustExec("create table t1 (a bigint(64), b bigint(64), index idx(b)) PARTITION BY HASH(a) PARTITIONS 2")
+	for i := 0; i < 200; i++ {
+		testKit.MustExec("insert into t1 values (1,2),(2,2),(4,5),(2,3),(3,4)")
+	}
+	testKit.MustExec("analyze table t1 with 0 topn")
+	// make the statistics inaccurate.
+	for i := 0; i < 200; i++ {
+		testKit.MustExec("insert into t1 values (3,4), (3,4), (3,4), (3,4), (3,4)")
+	}
+	is = s.do.InfoSchema()
+	table, err = is.TableByName(model.NewCIStr("test"), model.NewCIStr("t1"))
+	c.Assert(err, IsNil)
+	tblInfo = table.Meta()
+	statsTblBefore = h.GetTableStats(tblInfo)
+	// trigger feedback
+	testKit.MustExec("select b from t1 partition(p0) use index(idx) where t1.b <= 3;")
+	testKit.MustExec("select b from t1 partition(p1) use index(idx) where t1.b <= 3;")
+	testKit.MustExec("select b from t1 use index(idx) where t1.b <= 3 order by b;")
+	testKit.MustExec("select b from t1 use index(idx) where t1.b <= 3;")
+	checkFeedbackOnPartitionTable(statsTblBefore, tblInfo)
+}
+
 func (s *testStatsSuite) TestExtendedStatsPartitionTable(c *C) {
 	defer cleanEnv(c, s.store, s.do)
 	tk := testkit.NewTestKit(c, s.store)

diff --git a/statistics/handle/update.go b/statistics/handle/update.go
@@ -585,6 +585,10 @@ OUTER:
 			if !ok {
 				continue
 			}
+			if table.Meta().Partition != nil {
+				// If the table is partition table, the feedback will not work.
+				continue
+			}
 			tblStats := h.GetPartitionStats(table.Meta(), fb.PhysicalID)
 			newTblStats := tblStats.Copy()
 			if fb.Tp == statistics.IndexType {

diff --git a/statistics/handle/update_test.go b/statistics/handle/update_test.go
@@ -808,8 +808,8 @@ func (s *testStatsSuite) TestUpdatePartitionErrorRate(c *C) {
 	c.Assert(h.Update(is), IsNil)
 	tbl = h.GetPartitionStats(tblInfo, pid)
 
-	// The error rate of this column is not larger than MaxErrorRate now.
-	c.Assert(tbl.Columns[aID].NotAccurate(), IsFalse)
+	// Feedback will not take effect under partition table.
+	c.Assert(tbl.Columns[aID].NotAccurate(), IsTrue)
 }
 
 func appendBucket(h *statistics.Histogram, l, r int64) {
@@ -1014,6 +1014,7 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
 	handle.MinLogErrorRate = 0
 
 	h := s.do.StatsHandle()
+	// Feedback will not take effect under partition table.
 	tests := []struct {
 		sql     string
 		hist    string
@@ -1022,23 +1023,23 @@ func (s *testStatsSuite) TestQueryFeedbackForPartition(c *C) {
 		{
 			// test primary key feedback
 			sql: "select * from t where t.a <= 5",
-			hist: "column:1 ndv:2 totColSize:0\n" +
-				"num: 1 lower_bound: -9223372036854775808 upper_bound: 2 repeats: 0 ndv: 0\n" +
-				"num: 1 lower_bound: 2 upper_bound: 5 repeats: 0 ndv: 0",
+			hist: "column:1 ndv:2 totColSize:2\n" +
+				"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n" +
+				"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0",
 			idxCols: 0,
 		},
 		{
 			// test index feedback by double read
 			sql: "select * from t use index(idx) where t.b <= 5",
 			hist: "index:1 ndv:1\n" +
-				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0",
+				"num: 2 lower_bound: 2 upper_bound: 2 repeats: 2 ndv: 0",
 			idxCols: 1,
 		},
 		{
 			// test index feedback by single read
 			sql: "select b from t use index(idx) where t.b <= 5",
 			hist: "index:1 ndv:1\n" +
-				"num: 2 lower_bound: -inf upper_bound: 6 repeats: 0 ndv: 0",
+				"num: 2 lower_bound: 2 upper_bound: 2 repeats: 2 ndv: 0",
 			idxCols: 1,
 		},
 	}
@@ -1215,10 +1216,11 @@ func (s *testStatsSuite) TestUpdatePartitionStatsByLocalFeedback(c *C) {
 	pid := tblInfo.Partition.Definitions[0].ID
 	tbl := h.GetPartitionStats(tblInfo, pid)
 
+	// Feedback will not take effect under partition table.
 	c.Assert(tbl.Columns[tblInfo.Columns[0].ID].ToString(0), Equals, "column:1 ndv:3 totColSize:0\n"+
 		"num: 1 lower_bound: 1 upper_bound: 1 repeats: 1 ndv: 0\n"+
-		"num: 2 lower_bound: 2 upper_bound: 4 repeats: 0 ndv: 0\n"+
-		"num: 1 lower_bound: 4 upper_bound: 9223372036854775807 repeats: 0 ndv: 0")
+		"num: 1 lower_bound: 2 upper_bound: 2 repeats: 1 ndv: 0\n"+
+		"num: 1 lower_bound: 4 upper_bound: 4 repeats: 1 ndv: 0")
 }
 
 func (s *testStatsSuite) TestFeedbackWithStatsVer2(c *C) {
@@ -2085,6 +2087,7 @@ func (s *testSerialStatsSuite) TestAutoUpdatePartitionInDynamicOnlyMode(c *C) {
 	testKit := testkit.NewTestKit(c, s.store)
 	testkit.WithPruneMode(testKit, variable.DynamicOnly, func() {
 		testKit.MustExec("use test")
+		testKit.MustExec("set @@tidb_analyze_version = 2;")
 		testKit.MustExec("drop table if exists t")
 		testKit.MustExec(`create table t (a int, b varchar(10), index idx_ab(a, b))
 					partition by range (a) (

diff --git a/statistics/integration_test.go b/statistics/integration_test.go
@@ -230,7 +230,7 @@ func (s *testIntegrationSuite) TestGlobalStats(c *C) {
 	tk := testkit.NewTestKit(c, s.store)
 	tk.MustExec("use test")
 	tk.MustExec("drop table if exists t;")
-	tk.MustExec("set @@tidb_analyze_version = 2;")
+	tk.MustExec("set @@session.tidb_analyze_version = 2;")
 	tk.MustExec(`create table t (a int, key(a)) partition by range (a) (
 		partition p0 values less than (10),
 		partition p1 values less than (20),