From 7e83932f13238dc399651a87468d3eee7d444b2d Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Fri, 5 Jan 2024 16:17:04 +0800 Subject: [PATCH] statistics: avoid allocating the memory when to auto analyze with pseudo table (#50099) close pingcap/tidb#50100 --- .../handle/autoanalyze/autoanalyze.go | 6 ++--- pkg/statistics/handle/handle.go | 22 ++++++++++++++----- pkg/statistics/handle/types/interfaces.go | 3 +++ 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/pkg/statistics/handle/autoanalyze/autoanalyze.go b/pkg/statistics/handle/autoanalyze/autoanalyze.go index 9e1fd89ed956e..f59e90d28fdbd 100644 --- a/pkg/statistics/handle/autoanalyze/autoanalyze.go +++ b/pkg/statistics/handle/autoanalyze/autoanalyze.go @@ -414,7 +414,7 @@ func RandomPickOneTableAndTryAutoAnalyze( pi := tblInfo.GetPartitionInfo() // No partitions, analyze the whole table. if pi == nil { - statsTbl := statsHandle.GetTableStats(tblInfo) + statsTbl := statsHandle.GetTableStatsForAutoAnalyze(tblInfo) sql := "analyze table %n.%n" analyzed := tryAutoAnalyzeTable(sctx, statsHandle, sysProcTracker, tblInfo, statsTbl, autoAnalyzeRatio, sql, db, tblInfo.Name.O) if analyzed { @@ -491,10 +491,10 @@ func tryAutoAnalyzeTable( sql string, params ...interface{}, ) bool { - // 1. If the stats are not loaded, we don't need to analyze it. + // 1. If the statistics are either not loaded or are classified as pseudo, there is no need for analyze // 2. If the table is too small, we don't want to waste time to analyze it. // Leave the opportunity to other bigger tables. - if statsTbl.Pseudo || statsTbl.RealtimeCount < AutoAnalyzeMinCnt { + if statsTbl == nil || statsTbl.RealtimeCount < AutoAnalyzeMinCnt || statsTbl.Pseudo { return false } diff --git a/pkg/statistics/handle/handle.go b/pkg/statistics/handle/handle.go index 82872c1ebb72e..dd523bb8a7a8a 100644 --- a/pkg/statistics/handle/handle.go +++ b/pkg/statistics/handle/handle.go @@ -149,9 +149,18 @@ func (h *Handle) GetTableStats(tblInfo *model.TableInfo) *statistics.Table { return h.GetPartitionStats(tblInfo, tblInfo.ID) } +// GetTableStatsForAutoAnalyze is to get table stats but it will +func (h *Handle) GetTableStatsForAutoAnalyze(tblInfo *model.TableInfo) *statistics.Table { + return h.getPartitionStats(tblInfo, tblInfo.ID, false) +} + // GetPartitionStats retrieves the partition stats from cache. // TODO: remove GetTableStats later on. func (h *Handle) GetPartitionStats(tblInfo *model.TableInfo, pid int64) *statistics.Table { + return h.getPartitionStats(tblInfo, pid, true) +} + +func (h *Handle) getPartitionStats(tblInfo *model.TableInfo, pid int64, returnPseudo bool) *statistics.Table { var tbl *statistics.Table if h == nil { tbl = statistics.PseudoTable(tblInfo, false) @@ -160,12 +169,15 @@ func (h *Handle) GetPartitionStats(tblInfo *model.TableInfo, pid int64) *statist } tbl, ok := h.Get(pid) if !ok { - tbl = statistics.PseudoTable(tblInfo, false) - tbl.PhysicalID = pid - if tblInfo.GetPartitionInfo() == nil || h.Len() < 64 { - h.UpdateStatsCache([]*statistics.Table{tbl}, nil) + if returnPseudo { + tbl = statistics.PseudoTable(tblInfo, false) + tbl.PhysicalID = pid + if tblInfo.GetPartitionInfo() == nil || h.Len() < 64 { + h.UpdateStatsCache([]*statistics.Table{tbl}, nil) + } + return tbl } - return tbl + return nil } return tbl } diff --git a/pkg/statistics/handle/types/interfaces.go b/pkg/statistics/handle/types/interfaces.go index d8eb1c7663ac7..b7b7a65fd3130 100644 --- a/pkg/statistics/handle/types/interfaces.go +++ b/pkg/statistics/handle/types/interfaces.go @@ -430,6 +430,9 @@ type StatsHandle interface { // GetTableStats retrieves the statistics table from cache, and the cache will be updated by a goroutine. GetTableStats(tblInfo *model.TableInfo) *statistics.Table + // GetTableStatsForAutoAnalyze retrieves the statistics table from cache, but it will not return pseudo. + GetTableStatsForAutoAnalyze(tblInfo *model.TableInfo) *statistics.Table + // GetPartitionStats retrieves the partition stats from cache. GetPartitionStats(tblInfo *model.TableInfo, pid int64) *statistics.Table