Skip to content

Commit

Permalink
statistics: do not analyze table when auto analyze ratio is 0 (#51467)
Browse files Browse the repository at this point in the history
ref #50132
  • Loading branch information
Rustin170506 authored Mar 4, 2024
1 parent ac057eb commit 62afab3
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 41 deletions.
2 changes: 1 addition & 1 deletion pkg/statistics/handle/autoanalyze/refresher/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ go_test(
timeout = "short",
srcs = ["refresher_test.go"],
flaky = True,
shard_count = 9,
shard_count = 10,
deps = [
":refresher",
"//pkg/parser/model",
Expand Down
14 changes: 13 additions & 1 deletion pkg/statistics/handle/autoanalyze/refresher/refresher.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,8 @@ func CreateTableAnalysisJob(
indexes := CheckIndexesNeedAnalyze(tblInfo, tblStats)

// No need to analyze.
// We perform a separate check because users may set the auto analyze ratio to 0,
// yet still wish to analyze newly added indexes and tables that have not been analyzed.
if changePercentage == 0 && len(indexes) == 0 {
return nil
}
Expand Down Expand Up @@ -295,6 +297,13 @@ func CalculateChangePercentage(
return unanalyzedTableDefaultChangePercentage
}

// Auto analyze based on the change percentage is disabled.
// However, this check should not affect the analysis of indexes,
// as index analysis is still needed for query performance.
if autoAnalyzeRatio == 0 {
return 0
}

tblCnt := float64(tblStats.RealtimeCount)
if histCnt := tblStats.GetAnalyzeRowCount(); histCnt > 0 {
tblCnt = histCnt
Expand Down Expand Up @@ -406,6 +415,8 @@ func createTableAnalysisJobForPartitions(
partitionStats,
)
// No need to analyze.
// We perform a separate check because users may set the auto analyze ratio to 0,
// yet still wish to analyze newly added indexes and tables that have not been analyzed.
if len(partitionNames) == 0 && len(partitionIndexes) == 0 {
return nil
}
Expand Down Expand Up @@ -452,7 +463,8 @@ func CalculateIndicatorsForPartitions(
for _, def := range defs {
tblStats := partitionStats[def.ID]
changePercent := CalculateChangePercentage(tblStats, autoAnalyzeRatio)
// No need to analyze the partition because it doesn't meet the threshold or stats are not loaded yet.
// Skip partition analysis if it doesn't meet the threshold, stats are not yet loaded,
// or the auto analyze ratio is set to 0 by the user.
if changePercent == 0 {
continue
}
Expand Down
126 changes: 87 additions & 39 deletions pkg/statistics/handle/autoanalyze/refresher/refresher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,62 +30,110 @@ import (
"github.com/tikv/client-go/v2/oracle"
)

func TestPickOneTableAndAnalyzeByPriority(t *testing.T) {
func TestSkipAnalyzeTableWhenAutoAnalyzeRatioIsZero(t *testing.T) {
exec.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
}()

store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t1 (a int, b int, index idx(a)) " +
"partition by range (a) " +
"(partition p0 values less than (2), " +
"partition p1 values less than (4), " +
"partition p2 values less than (16))",
)

tk.MustExec("create table t1 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (4))")
tk.MustExec("create table t2 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (4))")
tk.MustExec("create table t2 (a int, b int, index idx(a)) " +
"partition by range (a) " +
"(partition p0 values less than (2), " +
"partition p1 values less than (4), " +
"partition p2 values less than (16))",
)
tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)")
tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)")
// Set the auto analyze ratio to 0.
tk.MustExec("set global tidb_auto_analyze_ratio = 0")
handle := dom.StatsHandle()
require.NoError(t, handle.DumpStatsDeltaToKV(true))
require.NoError(t, handle.Update(dom.InfoSchema()))
// Analyze those tables first.
tk.MustExec("analyze table t1")
tk.MustExec("analyze table t2")
// Insert more data into t1.
tk.MustExec("insert into t1 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)")
require.NoError(t, handle.DumpStatsDeltaToKV(true))
require.NoError(t, handle.Update(dom.InfoSchema()))
sysProcTracker := dom.SysProcTracker()
r := refresher.NewRefresher(handle, sysProcTracker)
r.RebuildTableAnalysisJobQueue()
// No jobs are added.
require.Equal(t, 0, r.Jobs.Len())
require.False(t, r.PickOneTableAndAnalyzeByPriority())
// Enable the auto analyze.
tk.MustExec("set global tidb_auto_analyze_ratio = 0.2")
r.RebuildTableAnalysisJobQueue()
// Jobs are added.
require.Equal(t, 1, r.Jobs.Len())
require.True(t, r.PickOneTableAndAnalyzeByPriority())
}

func TestPickOneTableAndAnalyzeByPriority(t *testing.T) {
exec.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
}()

store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t1 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))")
tk.MustExec("create table t2 (a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (2), partition p1 values less than (14))")
tk.MustExec("insert into t1 values (1, 1), (2, 2), (3, 3)")
tk.MustExec("insert into t2 values (1, 1), (2, 2), (3, 3)")
handle := dom.StatsHandle()
require.NoError(t, handle.DumpStatsDeltaToKV(true))
require.NoError(t, handle.Update(dom.InfoSchema()))
// Analyze those tables first.
tk.MustExec("analyze table t1")
tk.MustExec("analyze table t2")
require.NoError(t, handle.DumpStatsDeltaToKV(true))
require.NoError(t, handle.Update(dom.InfoSchema()))
// Insert more data into t1 and t2, but more data is inserted into t1.
tk.MustExec("insert into t1 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10), (11, 11), (12, 12), (13, 13)")
tk.MustExec("insert into t2 values (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)")
require.NoError(t, handle.DumpStatsDeltaToKV(true))
require.NoError(t, handle.Update(dom.InfoSchema()))
sysProcTracker := dom.SysProcTracker()
r := refresher.NewRefresher(handle, sysProcTracker)
// No jobs in the queue.
r.PickOneTableAndAnalyzeByPriority()
// The table is not analyzed.
is := dom.InfoSchema()
tbl1, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t1"))
r.RebuildTableAnalysisJobQueue()
require.Equal(t, 2, r.Jobs.Len())
// Analyze t1 first.
require.True(t, r.PickOneTableAndAnalyzeByPriority())
require.NoError(t, handle.DumpStatsDeltaToKV(true))
require.NoError(t, handle.Update(dom.InfoSchema()))
// The table is analyzed.
tbl1, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t1"))
require.NoError(t, err)
pid1 := tbl1.Meta().GetPartitionInfo().Definitions[0].ID
pid1 := tbl1.Meta().GetPartitionInfo().Definitions[1].ID
tblStats1 := handle.GetPartitionStats(tbl1.Meta(), pid1)
require.True(t, tblStats1.Pseudo)

// Add a job to the queue.
job1 := &priorityqueue.TableAnalysisJob{
TableID: tbl1.Meta().ID,
TableSchema: "test",
TableName: "t1",
ChangePercentage: 0.5,
Weight: 1,
}
r.Jobs.Push(job1)
tbl2, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t2"))
require.NoError(t, err)
job2 := &priorityqueue.TableAnalysisJob{
TableID: tbl2.Meta().ID,
TableSchema: "test",
TableName: "t2",
ChangePercentage: 0.5,
Weight: 0.9,
}
r.Jobs.Push(job2)
r.PickOneTableAndAnalyzeByPriority()
// The table is analyzed.
tblStats1 = handle.GetPartitionStats(tbl1.Meta(), pid1)
require.False(t, tblStats1.Pseudo)
require.Equal(t, int64(0), tblStats1.ModifyCount)
require.Equal(t, int64(12), tblStats1.RealtimeCount)
// t2 is not analyzed.
pid2 := tbl2.Meta().GetPartitionInfo().Definitions[0].ID
tbl2, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t2"))
require.NoError(t, err)
pid2 := tbl2.Meta().GetPartitionInfo().Definitions[1].ID
tblStats2 := handle.GetPartitionStats(tbl2.Meta(), pid2)
require.True(t, tblStats2.Pseudo)
require.Equal(t, int64(6), tblStats2.ModifyCount)
// Do one more round.
r.PickOneTableAndAnalyzeByPriority()
require.True(t, r.PickOneTableAndAnalyzeByPriority())
// t2 is analyzed.
pid2 = tbl2.Meta().GetPartitionInfo().Definitions[0].ID
pid2 = tbl2.Meta().GetPartitionInfo().Definitions[1].ID
tblStats2 = handle.GetPartitionStats(tbl2.Meta(), pid2)
require.False(t, tblStats2.Pseudo)
require.Equal(t, int64(0), tblStats2.ModifyCount)
require.Equal(t, int64(8), tblStats2.RealtimeCount)
}

func TestPickOneTableAndAnalyzeByPriorityWithFailedAnalysis(t *testing.T) {
Expand Down

0 comments on commit 62afab3

Please sign in to comment.