diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index 48146fd029700..cad48db7936fe 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -82,7 +82,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 37, + shard_count = 38, deps = [ "//pkg/config", "//pkg/meta/model", diff --git a/pkg/statistics/column.go b/pkg/statistics/column.go index 6cd2d11a9a97a..5c6c66e239926 100644 --- a/pkg/statistics/column.go +++ b/pkg/statistics/column.go @@ -260,3 +260,13 @@ func (c *Column) StatsAvailable() bool { // StatsVer, so we check NDV > 0 || NullCount > 0 for the case. return c.IsAnalyzed() || c.NDV > 0 || c.NullCount > 0 } + +// EmptyColumn creates an empty column object. It may be used for pseudo estimation or to stop loading unexisting stats. +func EmptyColumn(tid int64, pkIsHandle bool, colInfo *model.ColumnInfo) *Column { + return &Column{ + PhysicalID: tid, + Info: colInfo, + Histogram: *NewHistogram(colInfo.ID, 0, 0, 0, &colInfo.FieldType, 0, 0), + IsHandle: pkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), + } +} diff --git a/pkg/statistics/handle/storage/read.go b/pkg/statistics/handle/storage/read.go index 1a8a07381e134..79d1468c52f77 100644 --- a/pkg/statistics/handle/storage/read.go +++ b/pkg/statistics/handle/storage/read.go @@ -632,30 +632,38 @@ func CleanFakeItemsForShowHistInFlights(statsCache statstypes.StatsCache) int { } func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes.StatsHandle, col model.TableItemID, loadFMSketch bool, fullLoad bool) (err error) { - tbl, ok := statsHandle.Get(col.TableID) + statsTbl, ok := statsHandle.Get(col.TableID) if !ok { return nil } - - var colInfo *model.ColumnInfo - _, loadNeeded, analyzed := tbl.ColumnIsLoadNeeded(col.ID, true) - if !loadNeeded || !analyzed { - asyncload.AsyncLoadHistogramNeededItems.Delete(col) - return nil - } - // Now, we cannot init the column info in the ColAndIdxExistenceMap when to disable lite-init-stats. // so we have to get the column info from the domain. is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema) - tblInfo, ok := statsHandle.TableInfoByID(is, col.TableID) + tbl, ok := statsHandle.TableInfoByID(is, col.TableID) if !ok { return nil } - colInfo = tblInfo.Meta().GetColumnByID(col.ID) + tblInfo := tbl.Meta() + colInfo := tblInfo.GetColumnByID(col.ID) if colInfo == nil { asyncload.AsyncLoadHistogramNeededItems.Delete(col) return nil } + + _, loadNeeded, analyzed := statsTbl.ColumnIsLoadNeeded(col.ID, true) + if !loadNeeded || !analyzed { + // If this column is not analyzed yet and we don't have it in memory. + // We create a fake one for the pseudo estimation. + // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. + if loadNeeded && !analyzed { + fakeCol := statistics.EmptyColumn(tblInfo.ID, tblInfo.PKIsHandle, colInfo) + statsTbl.SetCol(col.ID, fakeCol) + statsHandle.UpdateStatsCache([]*statistics.Table{statsTbl}, nil) + } + asyncload.AsyncLoadHistogramNeededItems.Delete(col) + return nil + } + hg, _, statsVer, _, err := HistMetaFromStorageWithHighPriority(sctx, &col, colInfo) if hg == nil || err != nil { asyncload.AsyncLoadHistogramNeededItems.Delete(col) @@ -690,29 +698,29 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsHandle statstypes. CMSketch: cms, TopN: topN, FMSketch: fms, - IsHandle: tblInfo.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), + IsHandle: tblInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), StatsVer: statsVer, } // Reload the latest stats cache, otherwise the `updateStatsCache` may fail with high probability, because functions // like `GetPartitionStats` called in `fmSketchFromStorage` would have modified the stats cache already. - tbl, ok = statsHandle.Get(col.TableID) + statsTbl, ok = statsHandle.Get(col.TableID) if !ok { return nil } - tbl = tbl.Copy() + statsTbl = statsTbl.Copy() if colHist.StatsAvailable() { if fullLoad { colHist.StatsLoadedStatus = statistics.NewStatsFullLoadStatus() } else { colHist.StatsLoadedStatus = statistics.NewStatsAllEvictedStatus() } - tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, colHist.LastUpdateVersion) if statsVer != statistics.Version0 { - tbl.StatsVer = int(statsVer) + statsTbl.LastAnalyzeVersion = max(statsTbl.LastAnalyzeVersion, colHist.LastUpdateVersion) + statsTbl.StatsVer = int(statsVer) } } - tbl.SetCol(col.ID, colHist) - statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil) + statsTbl.SetCol(col.ID, colHist) + statsHandle.UpdateStatsCache([]*statistics.Table{statsTbl}, nil) asyncload.AsyncLoadHistogramNeededItems.Delete(col) if col.IsSyncLoadFailed { logutil.BgLogger().Warn("Hist for column should already be loaded as sync but not found.", @@ -771,9 +779,9 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, is infoschema.InfoSchema tbl = tbl.Copy() if idxHist.StatsVer != statistics.Version0 { tbl.StatsVer = int(idxHist.StatsVer) + tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion) } tbl.SetIdx(idx.ID, idxHist) - tbl.LastAnalyzeVersion = max(tbl.LastAnalyzeVersion, idxHist.LastUpdateVersion) statsHandle.UpdateStatsCache([]*statistics.Table{tbl}, nil) if idx.IsSyncLoadFailed { logutil.BgLogger().Warn("Hist for index should already be loaded as sync but not found.", diff --git a/pkg/statistics/handle/syncload/stats_syncload.go b/pkg/statistics/handle/syncload/stats_syncload.go index e9ff775a3fcea..03ec5fecf2ee7 100644 --- a/pkg/statistics/handle/syncload/stats_syncload.go +++ b/pkg/statistics/handle/syncload/stats_syncload.go @@ -341,13 +341,9 @@ func (s *statsSyncLoad) handleOneItemTask(task *statstypes.NeededItemTask) (err } // If this column is not analyzed yet and we don't have it in memory. // We create a fake one for the pseudo estimation. + // Otherwise, it will trigger the sync/async load again, even if the column has not been analyzed. if loadNeeded && !analyzed { - wrapper.col = &statistics.Column{ - PhysicalID: item.TableID, - Info: wrapper.colInfo, - Histogram: *statistics.NewHistogram(item.ID, 0, 0, 0, &wrapper.colInfo.FieldType, 0, 0), - IsHandle: isPkIsHandle && mysql.HasPriKeyFlag(wrapper.colInfo.GetFlag()), - } + wrapper.col = statistics.EmptyColumn(item.TableID, isPkIsHandle, wrapper.colInfo) s.updateCachedItem(tblInfo, item, wrapper.col, wrapper.idx, task.Item.FullLoad) return nil } diff --git a/pkg/statistics/integration_test.go b/pkg/statistics/integration_test.go index 8d55f04ab6bd5..6f498c125ec60 100644 --- a/pkg/statistics/integration_test.go +++ b/pkg/statistics/integration_test.go @@ -590,3 +590,25 @@ func TestGlobalIndexWithAnalyzeVersion1AndHistoricalStats(t *testing.T) { // Each analyze will only generate one record tk.MustQuery(fmt.Sprintf("select count(*) from mysql.stats_history where table_id=%d", tblID)).Equal(testkit.Rows("10")) } + +func TestLastAnalyzeVersionNotChangedWithAsyncStatsLoad(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("set @@tidb_stats_load_sync_wait = 0;") + tk.MustExec("use test") + tk.MustExec("create table t(a int, b int);") + require.NoError(t, dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh())) + require.NoError(t, dom.StatsHandle().Update(context.Background(), dom.InfoSchema())) + tk.MustExec("insert into t values (1, 1);") + err := dom.StatsHandle().DumpStatsDeltaToKV(true) + require.NoError(t, err) + tk.MustExec("alter table t add column c int default 1;") + dom.StatsHandle().HandleDDLEvent(<-dom.StatsHandle().DDLEventCh()) + tk.MustExec("select * from t where a = 1 or b = 1 or c = 1;") + require.NoError(t, dom.StatsHandle().LoadNeededHistograms(dom.InfoSchema())) + result := tk.MustQuery("show stats_meta where table_name = 't'") + require.Len(t, result.Rows(), 1) + // The last analyze time. + require.Equal(t, "", result.Rows()[0][6]) +} diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go index 04d7f394c80eb..804e115056c7b 100644 --- a/pkg/statistics/table.go +++ b/pkg/statistics/table.go @@ -811,7 +811,7 @@ func (t *Table) GetStatsHealthy() (int64, bool) { } // ColumnIsLoadNeeded checks whether the column needs trigger the async/sync load. -// The Column should be visible in the table and really has analyzed statistics in the stroage. +// The Column should be visible in the table and really has analyzed statistics in the storage. // Also, if the stats has been loaded into the memory, we also don't need to load it. // We return the Column together with the checking result, to avoid accessing the map multiple times. // The first bool is whether we need to load it into memory. The second bool is whether this column has stats in the system table or not. @@ -820,7 +820,7 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool return nil, false, false } // when we use non-lite init stats, it cannot init the stats for common columns. - // so we need to foce to load the stats. + // so we need to force to load the stats. col, ok := t.columns[id] if !ok { return nil, true, true @@ -828,15 +828,16 @@ func (t *Table) ColumnIsLoadNeeded(id int64, fullLoad bool) (*Column, bool, bool hasAnalyzed := t.ColAndIdxExistenceMap.HasAnalyzed(id, false) // If it's not analyzed yet. + // The real check condition: !ok && !hashAnalyzed. + // After this check, we will always have ok && hasAnalyzed. if !hasAnalyzed { return nil, false, false } // Restore the condition from the simplified form: - // 1. !ok && hasAnalyzed => need load - // 2. ok && hasAnalyzed && fullLoad && !col.IsFullLoad => need load - // 3. ok && hasAnalyzed && !fullLoad && !col.statsInitialized => need load - if !ok || (fullLoad && !col.IsFullLoad()) || (!fullLoad && !col.statsInitialized) { + // 1. ok && hasAnalyzed && fullLoad && !col.IsFullLoad => need load + // 2. ok && hasAnalyzed && !fullLoad && !col.statsInitialized => need load + if (fullLoad && !col.IsFullLoad()) || (!fullLoad && !col.statsInitialized) { return col, true, true }