Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: init LastAnalyzeVersion with snapshot timestamp #54465

Merged
merged 7 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/statistics/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ type AnalyzeResults struct {
TableID AnalyzeTableID
Count int64
StatsVer int
// Snapshot is the snapshot timestamp when we start the analysis job.
Snapshot uint64
// BaseCount is the original count in mysql.stats_meta at the beginning of analyze.
BaseCount int64
Expand Down
3 changes: 2 additions & 1 deletion pkg/statistics/handle/autoanalyze/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ go_test(
timeout = "short",
srcs = ["autoanalyze_test.go"],
flaky = True,
shard_count = 13,
shard_count = 14,
deps = [
":autoanalyze",
"//pkg/domain",
"//pkg/domain/infosync",
"//pkg/parser/model",
"//pkg/parser/mysql",
Expand Down
15 changes: 14 additions & 1 deletion pkg/statistics/handle/autoanalyze/autoanalyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"time"

"github.com/pingcap/failpoint"
"github.com/pingcap/tidb/pkg/domain"
"github.com/pingcap/tidb/pkg/domain/infosync"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
Expand Down Expand Up @@ -133,8 +134,20 @@ func TestAutoAnalyzeWithPredicateColumns(t *testing.T) {
func TestDisableAutoAnalyze(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
disableAutoAnalyzeCase(t, tk, dom)
}

func TestDisableAutoAnalyzeWithAnalyzeAllColumnsOptions(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
// Set tidb_analyze_column_options to ALL.
tk.MustExec("set global tidb_analyze_column_options='ALL'")
disableAutoAnalyzeCase(t, tk, dom)
}

func disableAutoAnalyzeCase(t *testing.T, tk *testkit.TestKit, dom *domain.Domain) {
tk.MustExec("use test")
tk.MustExec("create table t (a int, index idx(a))")
tk.MustExec("create table t (a int)")
tk.MustExec("insert into t values (1)")
h := dom.StatsHandle()
err := h.HandleDDLEvent(<-h.DDLEventCh())
Expand Down
13 changes: 12 additions & 1 deletion pkg/statistics/handle/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,22 @@ func (h *Handle) initStatsMeta4Chunk(ctx context.Context, is infoschema.InfoSche
maxPhysicalID = max(physicalID, maxPhysicalID)
tableInfo := table.Meta()
newHistColl := *statistics.NewHistColl(physicalID, true, row.GetInt64(3), row.GetInt64(2), 4, 4)
snapshot := row.GetUint64(4)
tbl := &statistics.Table{
HistColl: newHistColl,
Version: row.GetUint64(0),
ColAndIdxExistenceMap: statistics.NewColAndIndexExistenceMap(len(tableInfo.Columns), len(tableInfo.Indices)),
IsPkIsHandle: tableInfo.PKIsHandle,
// During the initialization phase, we need to initialize LastAnalyzeVersion with the snapshot,
// which ensures that we don't duplicate the auto-analyze of a particular type of table.
// When the predicate columns feature is turned on, if a table has neither predicate columns nor indexes,
// then auto-analyze will only analyze the _row_id and refresh stats_meta,
// but since we don't have any histograms or topn's created for _row_id at the moment.
// So if we don't initialize LastAnalyzeVersion with the snapshot here,
// it will stay at 0 and auto-analyze won't be able to detect that the table has been analyzed.
// But in the future, we maybe will create some records for _row_id, see:
// https://github.com/pingcap/tidb/issues/51098
LastAnalyzeVersion: snapshot,
}
cache.Put(physicalID, tbl) // put this table again since it is updated
}
Expand All @@ -90,7 +101,7 @@ func (h *Handle) initStatsMeta4Chunk(ctx context.Context, is infoschema.InfoSche

func (h *Handle) initStatsMeta(ctx context.Context, is infoschema.InfoSchema) (statstypes.StatsCache, error) {
ctx = kv.WithInternalSourceType(ctx, kv.InternalTxnStats)
sql := "select HIGH_PRIORITY version, table_id, modify_count, count from mysql.stats_meta"
sql := "select HIGH_PRIORITY version, table_id, modify_count, count, snapshot from mysql.stats_meta"
rc, err := util.Exec(h.initStatsCtx, sql)
if err != nil {
return nil, errors.Trace(err)
Expand Down
13 changes: 12 additions & 1 deletion pkg/statistics/handle/cache/statscache.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema) e
if err := util.CallWithSCtx(s.statsHandle.SPool(), func(sctx sessionctx.Context) error {
rows, _, err = util.ExecRows(
sctx,
"SELECT version, table_id, modify_count, count from mysql.stats_meta where version > %? order by version",
"SELECT version, table_id, modify_count, count, snapshot from mysql.stats_meta where version > %? order by version",
lastVersion,
)
return err
Expand All @@ -90,6 +90,7 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema) e
physicalID := row.GetInt64(1)
modifyCount := row.GetInt64(2)
count := row.GetInt64(3)
snapshot := row.GetUint64(4)

// Detect the context cancel signal, since it may take a long time for the loop.
// TODO: add context to TableInfoByID and remove this code block?
Expand Down Expand Up @@ -136,6 +137,16 @@ func (s *StatsCacheImpl) Update(ctx context.Context, is infoschema.InfoSchema) e
tbl.RealtimeCount = count
tbl.ModifyCount = modifyCount
tbl.TblInfoUpdateTS = tableInfo.UpdateTS
// It only occurs in the following situations:
// 1. The table has already been analyzed,
// but because the predicate columns feature is turned on, and it doesn't have any columns or indexes analyzed,
// it only analyzes _row_id and refreshes stats_meta, in which case the snapshot is not zero.
// 2. LastAnalyzeVersion is 0 because it has never been loaded.
// In this case, we can initialize LastAnalyzeVersion to the snapshot,
// otherwise auto-analyze will assume that the table has never been analyzed and try to analyze it again.
if tbl.LastAnalyzeVersion == 0 && snapshot != 0 {
tbl.LastAnalyzeVersion = snapshot
}
Rustin170506 marked this conversation as resolved.
Show resolved Hide resolved
tables = append(tables, tbl)
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ type Table struct {
HistColl
Version uint64
// It's the timestamp of the last analyze time.
// We used it in auto-analyze to determine if this table has been analyzed.
// The source of this field comes from two parts:
// 1. Initialized by snapshot when loading stats_meta.
// 2. Updated by the analysis time of a specific column or index when loading the histogram of the column or index.
LastAnalyzeVersion uint64
// TblInfoUpdateTS is the UpdateTS of the TableInfo used when filling this struct.
// It is the schema version of the corresponding table. It is used to skip redundant
Expand Down