From 62afab372b240d74cd0c00826a3534852bb35c06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=8C=E6=89=8B=E6=8E=89=E5=8C=85=E5=B7=A5=E7=A8=8B?= =?UTF-8?q?=E5=B8=88?= Date: Mon, 4 Mar 2024 16:15:33 +0800 Subject: [PATCH] statistics: do not analyze table when auto analyze ratio is 0 (#51467) ref pingcap/tidb#50132 --- .../handle/autoanalyze/refresher/BUILD.bazel | 2 +- .../handle/autoanalyze/refresher/refresher.go | 14 +- .../autoanalyze/refresher/refresher_test.go | 126 ++++++++++++------ 3 files changed, 101 insertions(+), 41 deletions(-) diff --git a/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel b/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel index 9245626b6b976..f3b524783dd70 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel +++ b/pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel @@ -28,7 +28,7 @@ go_test( timeout = "short", srcs = ["refresher_test.go"], flaky = True, - shard_count = 9, + shard_count = 10, deps = [ ":refresher", "//pkg/parser/model", diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher.go b/pkg/statistics/handle/autoanalyze/refresher/refresher.go index 6322ec96f54a9..275ffc7aff5e1 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher.go @@ -260,6 +260,8 @@ func CreateTableAnalysisJob( indexes := CheckIndexesNeedAnalyze(tblInfo, tblStats) // No need to analyze. + // We perform a separate check because users may set the auto analyze ratio to 0, + // yet still wish to analyze newly added indexes and tables that have not been analyzed. if changePercentage == 0 && len(indexes) == 0 { return nil } @@ -295,6 +297,13 @@ func CalculateChangePercentage( return unanalyzedTableDefaultChangePercentage } + // Auto analyze based on the change percentage is disabled. + // However, this check should not affect the analysis of indexes, + // as index analysis is still needed for query performance. + if autoAnalyzeRatio == 0 { + return 0 + } + tblCnt := float64(tblStats.RealtimeCount) if histCnt := tblStats.GetAnalyzeRowCount(); histCnt > 0 { tblCnt = histCnt @@ -406,6 +415,8 @@ func createTableAnalysisJobForPartitions( partitionStats, ) // No need to analyze. + // We perform a separate check because users may set the auto analyze ratio to 0, + // yet still wish to analyze newly added indexes and tables that have not been analyzed. if len(partitionNames) == 0 && len(partitionIndexes) == 0 { return nil } @@ -452,7 +463,8 @@ func CalculateIndicatorsForPartitions( for _, def := range defs { tblStats := partitionStats[def.ID] changePercent := CalculateChangePercentage(tblStats, autoAnalyzeRatio) - // No need to analyze the partition because it doesn't meet the threshold or stats are not loaded yet. + // Skip partition analysis if it doesn't meet the threshold, stats are not yet loaded, + // or the auto analyze ratio is set to 0 by the user. if changePercent == 0 { continue } diff --git a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go index b7d77c15f8161..27f5490459be9 100644 --- a/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go +++ b/pkg/statistics/handle/autoanalyze/refresher/refresher_test.go @@ -30,62 +30,110 @@ import ( "github.com/tikv/client-go/v2/oracle" ) -func TestPickOneTableAndAnalyzeByPriority(t *testing.T) { +func TestSkipAnalyzeTableWhenAutoAnalyzeRatioIsZero(t *testing.T) { + exec.AutoAnalyzeMinCnt = 0 + defer func() { + exec.AutoAnalyzeMinCnt = 1000 + }() + store, dom := testkit.CreateMockStoreAndDomain(t) tk := testkit.NewTestKit(t, store) tk.MustExec("use test") + tk.MustExec("create table t1 (a int, b int, index idx(a)) " + + "partition by range (a) " + + "(partition p0 values less than (2), " + + "partition p1 values less than (4), " + + "partition p2 values less than (16))", + ) - tk.MustExec("create table t1 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (4))") - tk.MustExec("create table t2 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (4))") + tk.MustExec("create table t2 (a int, b int, index idx(a)) " + + "partition by range (a) " + + "(partition p0 values less than (2), " + + "partition p1 values less than (4), " + + "partition p2 values less than (16))", + ) tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)") + // Set the auto analyze ratio to 0. + tk.MustExec("set global tidb_auto_analyze_ratio = 0") + handle := dom.StatsHandle() + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(dom.InfoSchema())) + // Analyze those tables first. + tk.MustExec("analyze table t1") + tk.MustExec("analyze table t2") + // Insert more data into t1. + tk.MustExec("insert into t1 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)") + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(dom.InfoSchema())) + sysProcTracker := dom.SysProcTracker() + r := refresher.NewRefresher(handle, sysProcTracker) + r.RebuildTableAnalysisJobQueue() + // No jobs are added. + require.Equal(t, 0, r.Jobs.Len()) + require.False(t, r.PickOneTableAndAnalyzeByPriority()) + // Enable the auto analyze. + tk.MustExec("set global tidb_auto_analyze_ratio = 0.2") + r.RebuildTableAnalysisJobQueue() + // Jobs are added. + require.Equal(t, 1, r.Jobs.Len()) + require.True(t, r.PickOneTableAndAnalyzeByPriority()) +} + +func TestPickOneTableAndAnalyzeByPriority(t *testing.T) { + exec.AutoAnalyzeMinCnt = 0 + defer func() { + exec.AutoAnalyzeMinCnt = 1000 + }() + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t1 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))") + tk.MustExec("create table t2 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))") + tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)") + tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)") handle := dom.StatsHandle() + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(dom.InfoSchema())) + // Analyze those tables first. + tk.MustExec("analyze table t1") + tk.MustExec("analyze table t2") + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(dom.InfoSchema())) + // Insert more data into t1 and t2, but more data is inserted into t1. + tk.MustExec("insert into t1 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13)") + tk.MustExec("insert into t2 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)") + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(dom.InfoSchema())) sysProcTracker := dom.SysProcTracker() r := refresher.NewRefresher(handle, sysProcTracker) - // No jobs in the queue. - r.PickOneTableAndAnalyzeByPriority() - // The table is not analyzed. - is := dom.InfoSchema() - tbl1, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t1")) + r.RebuildTableAnalysisJobQueue() + require.Equal(t, 2, r.Jobs.Len()) + // Analyze t1 first. + require.True(t, r.PickOneTableAndAnalyzeByPriority()) + require.NoError(t, handle.DumpStatsDeltaToKV(true)) + require.NoError(t, handle.Update(dom.InfoSchema())) + // The table is analyzed. + tbl1, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t1")) require.NoError(t, err) - pid1 := tbl1.Meta().GetPartitionInfo().Definitions[0].ID + pid1 := tbl1.Meta().GetPartitionInfo().Definitions[1].ID tblStats1 := handle.GetPartitionStats(tbl1.Meta(), pid1) - require.True(t, tblStats1.Pseudo) - - // Add a job to the queue. - job1 := &priorityqueue.TableAnalysisJob{ - TableID: tbl1.Meta().ID, - TableSchema: "test", - TableName: "t1", - ChangePercentage: 0.5, - Weight: 1, - } - r.Jobs.Push(job1) - tbl2, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t2")) - require.NoError(t, err) - job2 := &priorityqueue.TableAnalysisJob{ - TableID: tbl2.Meta().ID, - TableSchema: "test", - TableName: "t2", - ChangePercentage: 0.5, - Weight: 0.9, - } - r.Jobs.Push(job2) - r.PickOneTableAndAnalyzeByPriority() - // The table is analyzed. - tblStats1 = handle.GetPartitionStats(tbl1.Meta(), pid1) - require.False(t, tblStats1.Pseudo) + require.Equal(t, int64(0), tblStats1.ModifyCount) + require.Equal(t, int64(12), tblStats1.RealtimeCount) // t2 is not analyzed. - pid2 := tbl2.Meta().GetPartitionInfo().Definitions[0].ID + tbl2, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t2")) + require.NoError(t, err) + pid2 := tbl2.Meta().GetPartitionInfo().Definitions[1].ID tblStats2 := handle.GetPartitionStats(tbl2.Meta(), pid2) - require.True(t, tblStats2.Pseudo) + require.Equal(t, int64(6), tblStats2.ModifyCount) // Do one more round. - r.PickOneTableAndAnalyzeByPriority() + require.True(t, r.PickOneTableAndAnalyzeByPriority()) // t2 is analyzed. - pid2 = tbl2.Meta().GetPartitionInfo().Definitions[0].ID + pid2 = tbl2.Meta().GetPartitionInfo().Definitions[1].ID tblStats2 = handle.GetPartitionStats(tbl2.Meta(), pid2) - require.False(t, tblStats2.Pseudo) + require.Equal(t, int64(0), tblStats2.ModifyCount) + require.Equal(t, int64(8), tblStats2.RealtimeCount) } func TestPickOneTableAndAnalyzeByPriorityWithFailedAnalysis(t *testing.T) {