Skip to content

Commit

Permalink
statistics: init LastAnalyzeVersion with snapshot timestamp (#54465)
Browse files Browse the repository at this point in the history
ref #53567
  • Loading branch information
Rustin170506 authored Jul 16, 2024
1 parent 5e872b9 commit 1bf27af
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 4 deletions.
1 change: 1 addition & 0 deletions pkg/statistics/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ type AnalyzeResults struct {
TableID AnalyzeTableID
Count int64
StatsVer int
// Snapshot is the snapshot timestamp when we start the analysis job.
Snapshot uint64
// BaseCount is the original count in mysql.stats_meta at the beginning of analyze.
BaseCount int64
Expand Down
3 changes: 2 additions & 1 deletion pkg/statistics/handle/autoanalyze/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ go_test(
timeout = "short",
srcs = ["autoanalyze_test.go"],
flaky = True,
shard_count = 13,
shard_count = 14,
deps = [
":autoanalyze",
"//pkg/domain",
"//pkg/domain/infosync",
"//pkg/parser/model",
"//pkg/parser/mysql",
Expand Down
15 changes: 14 additions & 1 deletion pkg/statistics/handle/autoanalyze/autoanalyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"time"

"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/domain"
"github.com/pingcap/tidb/pkg/domain/infosync"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
Expand Down Expand Up @@ -133,8 +134,20 @@ func TestAutoAnalyzeWithPredicateColumns(t *testing.T) {
func TestDisableAutoAnalyze(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
disableAutoAnalyzeCase(t, tk, dom)
}

func TestDisableAutoAnalyzeWithAnalyzeAllColumnsOptions(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
// Set tidb_analyze_column_options to ALL.
tk.MustExec("set global tidb_analyze_column_options='ALL'")
disableAutoAnalyzeCase(t, tk, dom)
}

func disableAutoAnalyzeCase(t *testing.T, tk *testkit.TestKit, dom *domain.Domain) {
tk.MustExec("use test")
tk.MustExec("create table t (a int, index idx(a))")
tk.MustExec("create table t (a int)")
tk.MustExec("insert into t values (1)")
h := dom.StatsHandle()
err := h.HandleDDLEvent(<-h.DDLEventCh())
Expand Down
13 changes: 12 additions & 1 deletion pkg/statistics/handle/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,22 @@ func (h *Handle) initStatsMeta4Chunk(ctx context.Context, is infoschema.InfoSche
maxPhysicalID = max(physicalID, maxPhysicalID)
tableInfo := table.Meta()
newHistColl := *statistics.NewHistColl(physicalID, true, row.GetInt64(3), row.GetInt64(2), 4, 4)
snapshot := row.GetUint64(4)
tbl := &statistics.Table{
HistColl: newHistColl,
Version: row.GetUint64(0),
ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(len(tableInfo.Columns), len(tableInfo.Indices)),
IsPkIsHandle: tableInfo.PKIsHandle,
// During the initialization phase, we need to initialize LastAnalyzeVersion with the snapshot,
// which ensures that we don't duplicate the auto-analyze of a particular type of table.
// When the predicate columns feature is turned on, if a table has neither predicate columns nor indexes,
// then auto-analyze will only analyze the _row_id and refresh stats_meta,
// but since we don't have any histograms or topn's created for _row_id at the moment.
// So if we don't initialize LastAnalyzeVersion with the snapshot here,
// it will stay at 0 and auto-analyze won't be able to detect that the table has been analyzed.
// But in the future, we maybe will create some records for _row_id, see:
// https://github.com/pingcap/tidb/issues/51098
LastAnalyzeVersion: snapshot,
}
cache.Put(physicalID, tbl) // put this table again since it is updated
}
Expand All @@ -90,7 +101,7 @@ func (h *Handle) initStatsMeta4Chunk(ctx context.Context, is infoschema.InfoSche

func (h *Handle) initStatsMeta(ctx context.Context, is infoschema.InfoSchema) (statstypes.StatsCache, error) {
ctx = kv.WithInternalSourceType(ctx, kv.InternalTxnStats)
sql := "select HIGH_PRIORITY version, table_id, modify_count, count from mysql.stats_meta"
sql := "select HIGH_PRIORITY version, table_id, modify_count, count, snapshot from mysql.stats_meta"
rc, err := util.Exec(h.initStatsCtx, sql)
if err != nil {
return nil, errors.Trace(err)
Expand Down
13 changes: 12 additions & 1 deletion pkg/statistics/handle/cache/statscache.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema) e
if err := util.CallWithSCtx(s.statsHandle.SPool(), func(sctx sessionctx.Context) error {
rows, _, err = util.ExecRows(
sctx,
"SELECT version, table_id, modify_count, count from mysql.stats_meta where version > %? order by version",
"SELECT version, table_id, modify_count, count, snapshot from mysql.stats_meta where version > %? order by version",
lastVersion,
)
return err
Expand All @@ -90,6 +90,7 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema) e
physicalID := row.GetInt64(1)
modifyCount := row.GetInt64(2)
count := row.GetInt64(3)
snapshot := row.GetUint64(4)

// Detect the context cancel signal, since it may take a long time for the loop.
// TODO: add context to TableInfoByID and remove this code block?
Expand Down Expand Up @@ -136,6 +137,16 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema) e
tbl.RealtimeCount = count
tbl.ModifyCount = modifyCount
tbl.TblInfoUpdateTS = tableInfo.UpdateTS
// It only occurs in the following situations:
// 1. The table has already been analyzed,
// but because the predicate columns feature is turned on, and it doesn't have any columns or indexes analyzed,
// it only analyzes _row_id and refreshes stats_meta, in which case the snapshot is not zero.
// 2. LastAnalyzeVersion is 0 because it has never been loaded.
// In this case, we can initialize LastAnalyzeVersion to the snapshot,
// otherwise auto-analyze will assume that the table has never been analyzed and try to analyze it again.
if tbl.LastAnalyzeVersion == 0 && snapshot != 0 {
tbl.LastAnalyzeVersion = snapshot
}
tables = append(tables, tbl)
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ type Table struct {
HistColl
Version uint64
// It's the timestamp of the last analyze time.
// We used it in auto-analyze to determine if this table has been analyzed.
// The source of this field comes from two parts:
// 1. Initialized by snapshot when loading stats_meta.
// 2. Updated by the analysis time of a specific column or index when loading the histogram of the column or index.
LastAnalyzeVersion uint64
// TblInfoUpdateTS is the UpdateTS of the TableInfo used when filling this struct.
// It is the schema version of the corresponding table. It is used to skip redundant
Expand Down

0 comments on commit 1bf27af

Please sign in to comment.