Skip to content

Commit

Permalink
statistics: make sure PQ can analyze all indexes with stats version 1 (
Browse files Browse the repository at this point in the history
  • Loading branch information
Rustin170506 authored Nov 11, 2024
1 parent e16613d commit 1b49096
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,10 @@ func (j *DynamicPartitionedTableAnalysisJob) analyzePartitionIndexes(
sysProcTracker sysproctrack.Tracker,
) (success bool) {
analyzePartitionBatchSize := int(variable.AutoAnalyzePartitionBatchSize.Load())
// For version 2, analyze one index will analyze all other indexes and columns.
// For version 1, analyze one index will only analyze the specified index.
analyzeVersion := sctx.GetSessionVars().AnalyzeVersion

OnlyPickOneIndex:
for indexName, partitionNames := range j.PartitionIndexes {
needAnalyzePartitionNames := make([]any, 0, len(partitionNames))
for _, partition := range partitionNames {
Expand All @@ -262,10 +264,16 @@ OnlyPickOneIndex:
params := append([]any{j.TableSchema, j.GlobalTableName}, needAnalyzePartitionNames[start:end]...)
params = append(params, indexName)
success = exec.AutoAnalyze(sctx, statsHandle, sysProcTracker, j.TableStatsVer, sql, params...)
if !success {
return false
}
}
// For version 1, we need to analyze all indexes.
if analyzeVersion != 1 {
// Halt execution after analyzing one index.
// This is because analyzing a single index also analyzes all other indexes and columns.
// Therefore, to avoid redundancy, we prevent multiple analyses of the same partition.
break OnlyPickOneIndex
break
}
}
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,18 @@ func (j *NonPartitionedTableAnalysisJob) analyzeIndexes(
if len(j.Indexes) == 0 {
return true
}
// For version 2, analyze one index will analyze all other indexes and columns.
// For version 1, analyze one index will only analyze the specified index.
analyzeVersion := sctx.GetSessionVars().AnalyzeVersion
if analyzeVersion == 1 {
for _, index := range j.Indexes {
sql, params := j.GenSQLForAnalyzeIndex(index)
if !exec.AutoAnalyze(sctx, statsHandle, sysProcTracker, j.TableStatsVer, sql, params...) {
return false
}
}
return true
}
// Only analyze the first index.
// This is because analyzing a single index also analyzes all other indexes and columns.
// Therefore, to avoid redundancy, we prevent multiple analyses of the same table.
Expand Down
10 changes: 2 additions & 8 deletions pkg/statistics/handle/autoanalyze/priorityqueue/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ func (pq *AnalysisPriorityQueue) ProcessDMLChanges() {

// Only update if we've seen a newer version
if newMaxVersion > lastFetchTimestamp {
statslogutil.StatsLogger().Info("Updating last fetch timestamp", zap.Uint64("new_max_version", newMaxVersion))
queueSamplerLogger().Info("Updating last fetch timestamp", zap.Uint64("new_max_version", newMaxVersion))
pq.syncFields.lastDMLUpdateFetchTimestamp = newMaxVersion
}
return nil
Expand Down Expand Up @@ -404,12 +404,6 @@ func (pq *AnalysisPriorityQueue) processTableStats(
return errors.Trace(err)
}
jobFactory := NewAnalysisJobFactory(sctx, autoAnalyzeRatio, currentTs)
// Check if the table is needed to be analyzed.
// Note: Unanalyzed tables will also be considered.
changePercent := jobFactory.CalculateChangePercentage(stats)
if changePercent == 0 {
return nil
}
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
pruneMode := variable.PartitionPruneMode(sctx.GetSessionVars().PartitionPruneMode.Load())

Expand Down Expand Up @@ -455,14 +449,14 @@ func (pq *AnalysisPriorityQueue) tryCreateJob(
}

tableInfo, ok := pq.statsHandle.TableInfoByID(is, stats.PhysicalID)
tableMeta := tableInfo.Meta()
if !ok {
statslogutil.StatsLogger().Warn(
"Table info not found for table id",
zap.Int64("tableID", stats.PhysicalID),
)
return nil
}
tableMeta := tableInfo.Meta()
schemaName, ok := is.SchemaNameByTableID(tableMeta.ID)
if !ok {
statslogutil.StatsLogger().Warn(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -902,3 +902,48 @@ func TestVectorIndexTriggerAutoAnalyze(t *testing.T) {
// No event is found
require.Nil(t, addIndexEvent)
}

func TestAddIndexTriggerAutoAnalyzeWithStatsVersion1(t *testing.T) {
store, do := testkit.CreateMockStoreAndDomain(t)
testKit := testkit.NewTestKit(t, store)
testKit.MustExec("set @@global.tidb_analyze_version=1;")
testKit.MustExec("use test")
testKit.MustExec("create table t (c1 int, c2 int, index idx(c1, c2)) partition by range columns (c1) (partition p0 values less than (5), partition p1 values less than (10))")
is := do.InfoSchema()
tbl, err := is.TableByName(context.Background(), pmodel.NewCIStr("test"), pmodel.NewCIStr("t"))
require.NoError(t, err)
tableInfo := tbl.Meta()
h := do.StatsHandle()
// Analyze table.
testKit.MustExec("analyze table t")
require.NoError(t, h.Update(context.Background(), do.InfoSchema()))
// Insert some data.
testKit.MustExec("insert into t values (1,2),(2,2)")
require.NoError(t, h.DumpStatsDeltaToKV(true))
require.NoError(t, h.Update(context.Background(), do.InfoSchema()))
// Add two indexes.
testKit.MustExec("alter table t add index idx1(c1)")
testKit.MustExec("alter table t add index idx2(c2)")

statistics.AutoAnalyzeMinCnt = 0
defer func() {
statistics.AutoAnalyzeMinCnt = 1000
}()

pq := priorityqueue.NewAnalysisPriorityQueue(h)
defer pq.Close()
require.NoError(t, pq.Initialize())
isEmpty, err := pq.IsEmpty()
require.NoError(t, err)
require.False(t, isEmpty)
job, err := pq.Peek()
require.NoError(t, err)
require.Equal(t, tableInfo.ID, job.GetTableID())
require.NoError(t, job.Analyze(h, do.SysProcTracker()))

// Check the stats of the indexes.
tableStats := h.GetTableStats(tableInfo)
require.True(t, tableStats.GetIdx(1).IsAnalyzed())
require.True(t, tableStats.GetIdx(2).IsAnalyzed())
require.True(t, tableStats.GetIdx(3).IsAnalyzed())
}
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,18 @@ func (j *StaticPartitionedTableAnalysisJob) analyzeStaticPartitionIndexes(
if len(j.Indexes) == 0 {
return true
}
// For version 2, analyze one index will analyze all other indexes and columns.
// For version 1, analyze one index will only analyze the specified index.
analyzeVersion := sctx.GetSessionVars().AnalyzeVersion
if analyzeVersion == 1 {
for _, index := range j.Indexes {
sql, params := j.GenSQLForAnalyzeStaticPartitionIndex(index)
if !exec.AutoAnalyze(sctx, statsHandle, sysProcTracker, j.TableStatsVer, sql, params...) {
return false
}
}
return true
}
// Only analyze the first index.
// This is because analyzing a single index also analyzes all other indexes and columns.
// Therefore, to avoid redundancy, we prevent multiple analyses of the same partition.
Expand Down

0 comments on commit 1b49096

Please sign in to comment.