Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: respect tidb_analyze_column_options when build analyze plan #54240

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 1 addition & 14 deletions pkg/executor/test/analyzetest/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,6 @@ func TestSavedAnalyzeColumnOptions(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal4))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")

tk.MustExec("use test")
tk.MustExec("set @@session.tidb_analyze_version = 2")
Expand Down Expand Up @@ -1151,7 +1150,6 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where a > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
Expand Down Expand Up @@ -1219,7 +1217,6 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where c > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
Expand Down Expand Up @@ -1296,7 +1293,6 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where c > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
Expand Down Expand Up @@ -1378,7 +1374,6 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where a < 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
Expand Down Expand Up @@ -1502,7 +1497,6 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where a < 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
Expand Down Expand Up @@ -1606,7 +1600,6 @@ func TestAnalyzeColumnsWithExtendedStats(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where b > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
Expand Down Expand Up @@ -1676,7 +1669,6 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where b > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
Expand Down Expand Up @@ -1759,7 +1751,6 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where b > 1")
require.NoError(t, h.DumpColStatsUsageToKV())
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows()
Expand Down Expand Up @@ -1835,10 +1826,8 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) {
tk.MustExec("analyze table t predicate columns")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`,
"Warning 1105 No predicate column has been collected yet for table test.t so all columns are analyzed",
"Warning 1105 No predicate column has been collected yet for table test.t, so only indexes and the columns composing the indexes will be analyzed",
))
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows()
require.Equal(t, 2, len(rows))

for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} {
func(choice model.ColumnChoice) {
Expand All @@ -1853,7 +1842,6 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where b > 1")
require.NoError(t, dom.StatsHandle().DumpColStatsUsageToKV())
tk.MustExec("analyze table t predicate columns")
Expand Down Expand Up @@ -2167,7 +2155,6 @@ func TestShowAanalyzeStatusJobInfo(t *testing.T) {
}
checkJobInfo("analyze table columns b, c, d with 2 buckets, 2 topn, 1 samplerate")
tk.MustExec("set global tidb_persist_analyze_options = 1")
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustExec("select * from t where c > 1")
h := dom.StatsHandle()
require.NoError(t, h.DumpColStatsUsageToKV())
Expand Down
72 changes: 57 additions & 15 deletions pkg/planner/core/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1962,6 +1962,7 @@ func (b *PlanBuilder) getMustAnalyzedColumns(tbl *ast.TableName, cols *calcOnceM
return cols.data, nil
}

// getPredicateColumns gets the columns used in predicates.
func (b *PlanBuilder) getPredicateColumns(tbl *ast.TableName, cols *calcOnceMap) (map[int64]struct{}, error) {
// Already calculated in the previous call.
if cols.calculated {
Expand All @@ -1976,10 +1977,13 @@ func (b *PlanBuilder) getPredicateColumns(tbl *ast.TableName, cols *calcOnceMap)
return nil, err
}
if len(colList) == 0 {
b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.NewNoStackErrorf("No predicate column has been collected yet for table %s.%s so all columns are analyzed", tbl.Schema.L, tbl.Name.L))
for _, colInfo := range tblInfo.Columns {
cols.data[colInfo.ID] = struct{}{}
}
b.ctx.GetSessionVars().StmtCtx.AppendWarning(
errors.NewNoStackErrorf(
"No predicate column has been collected yet for table %s.%s, so only indexes and the columns composing the indexes will be analyzed",
tbl.Schema.L,
tbl.Name.L,
),
)
} else {
for _, id := range colList {
cols.data[id] = struct{}{}
Expand Down Expand Up @@ -2017,22 +2021,40 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo(
}

switch columnChoice {
case model.DefaultChoice, model.AllColumns:
return tbl.TableInfo.Columns, nil, nil
case model.PredicateColumns:
if mustAllColumns {
case model.DefaultChoice:
columnOptions := variable.AnalyzeColumnOptions.Load()
switch columnOptions {
case model.AllColumns.String():
return tbl.TableInfo.Columns, nil, nil
case model.PredicateColumns.String():
columns, err := b.getColumnsBasedOnPredicateColumns(
tbl,
predicateCols,
mustAnalyzedCols,
mustAllColumns,
)
if err != nil {
return nil, nil, err
}
return columns, nil, nil
default:
// Usually, this won't happen.
logutil.BgLogger().Warn("Unknown default column choice, analyze all columns", zap.String("choice", columnOptions))
return tbl.TableInfo.Columns, nil, nil
}
predicate, err := b.getPredicateColumns(tbl, predicateCols)
if err != nil {
return nil, nil, err
}
mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols)
case model.AllColumns:
return tbl.TableInfo.Columns, nil, nil
case model.PredicateColumns:
columns, err := b.getColumnsBasedOnPredicateColumns(
tbl,
predicateCols,
mustAnalyzedCols,
mustAllColumns,
)
if err != nil {
return nil, nil, err
}
colSet := combineColumnSets(predicate, mustAnalyzed)
return getColumnListFromSet(tbl.TableInfo.Columns, colSet), nil, nil
return columns, nil, nil
case model.ColumnList:
colSet := getColumnSetFromSpecifiedCols(specifiedCols)
mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols)
Expand All @@ -2058,6 +2080,26 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo(
return nil, nil, nil
}

func (b *PlanBuilder) getColumnsBasedOnPredicateColumns(
tbl *ast.TableName,
predicateCols, mustAnalyzedCols *calcOnceMap,
rewriteAllStatsNeeded bool,
) ([]*model.ColumnInfo, error) {
if rewriteAllStatsNeeded {
return tbl.TableInfo.Columns, nil
}
predicate, err := b.getPredicateColumns(tbl, predicateCols)
if err != nil {
return nil, err
}
mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols)
if err != nil {
return nil, err
}
colSet := combineColumnSets(predicate, mustAnalyzed)
return getColumnListFromSet(tbl.TableInfo.Columns, colSet), nil
}

// Helper function to combine two column sets.
func combineColumnSets(sets ...map[int64]struct{}) map[int64]struct{} {
result := make(map[int64]struct{})
Expand Down
2 changes: 1 addition & 1 deletion pkg/statistics/handle/autoanalyze/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ go_test(
timeout = "short",
srcs = ["autoanalyze_test.go"],
flaky = True,
shard_count = 12,
shard_count = 13,
deps = [
":autoanalyze",
"//pkg/domain/infosync",
Expand Down
39 changes: 39 additions & 0 deletions pkg/statistics/handle/autoanalyze/autoanalyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,45 @@ func TestAutoAnalyzeLockedTable(t *testing.T) {
require.True(t, dom.StatsHandle().HandleAutoAnalyze())
}

func TestAutoAnalyzeWithPredicateColumns(t *testing.T) {
// Create a table and add it to stats cache.
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("create table t (a int, b int)")
tk.MustExec("insert into t values (1, 1)")
tk.MustQuery("select * from t where a > 0").Check(testkit.Rows("1 1"))
h := dom.StatsHandle()
err := h.HandleDDLEvent(<-h.DDLEventCh())
require.NoError(t, err)
require.NoError(t, h.DumpColStatsUsageToKV())
require.NoError(t, h.DumpStatsDeltaToKV(true))
is := dom.InfoSchema()
require.NoError(t, h.Update(is))
exec.AutoAnalyzeMinCnt = 0
defer func() {
exec.AutoAnalyzeMinCnt = 1000
}()

// Check column_stats_usage.
rows := tk.MustQuery(
"show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null",
).Rows()
require.Equal(t, 1, len(rows))
require.Equal(t, "a", rows[0][3])

// Set tidb_analyze_column_options to PREDICATE.
tk.MustExec("set global tidb_analyze_column_options='PREDICATE'")

// Trigger auto analyze.
require.True(t, dom.StatsHandle().HandleAutoAnalyze())

// Check analyze jobs.
tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check(
testkit.Rows("t auto analyze table columns a with 256 buckets, 100 topn, 1 samplerate"),
)
}

func TestDisableAutoAnalyze(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
Expand Down
2 changes: 0 additions & 2 deletions pkg/statistics/handle/updatetest/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,6 @@ func TestDumpColumnStatsUsage(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")

h := dom.StatsHandle()
tk.MustExec("use test")
Expand Down Expand Up @@ -987,7 +986,6 @@ func TestCollectPredicateColumnsFromExecute(t *testing.T) {
defer func() {
tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal2))
}()
tk.MustExec("set global tidb_enable_column_tracking = 1")

h := dom.StatsHandle()
tk.MustExec("use test")
Expand Down
2 changes: 1 addition & 1 deletion pkg/statistics/handle/usage/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ go_test(
],
embed = [":usage"],
flaky = True,
shard_count = 3,
shard_count = 10,
deps = [
"//pkg/infoschema",
"//pkg/parser/model",
Expand Down
Loading