From 0a1c21376731d4be1cac5d0ca45319c5dcc254da Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 26 Jun 2024 16:27:38 +0800 Subject: [PATCH 01/18] planner: respect `tidb_analyze_default_column_choice` when build analyze job Signed-off-by: hi-rustin fix Signed-off-by: hi-rustin --- pkg/planner/core/planbuilder.go | 61 +++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index 3187a257fad28..a84747b5e85c4 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -1962,6 +1962,7 @@ func (b *PlanBuilder) getMustAnalyzedColumns(tbl *ast.TableName, cols *calcOnceM return cols.data, nil } +// getPredicateColumns gets the columns used in predicates. func (b *PlanBuilder) getPredicateColumns(tbl *ast.TableName, cols *calcOnceMap) (map[int64]struct{}, error) { // Already calculated in the previous call. if cols.calculated { @@ -2017,22 +2018,40 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo( } switch columnChoice { - case model.DefaultChoice, model.AllColumns: - return tbl.TableInfo.Columns, nil, nil - case model.PredicateColumns: - if mustAllColumns { + case model.DefaultChoice: + columnOptions := variable.AnalyzeColumnOptions.Load() + switch columnOptions { + case model.AllColumns.String(): + return tbl.TableInfo.Columns, nil, nil + case model.PredicateColumns.String(): + columns, err := b.getColumnsBasedOnPredicateColumns( + tbl, + predicateCols, + mustAnalyzedCols, + mustAllColumns, + ) + if err != nil { + return nil, nil, err + } + return columns, nil, nil + default: + // Usually, this won't happen. + logutil.BgLogger().Warn("Unknown default column choice", zap.String("choice", columnOptions)) return tbl.TableInfo.Columns, nil, nil } - predicate, err := b.getPredicateColumns(tbl, predicateCols) - if err != nil { - return nil, nil, err - } - mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols) + case model.AllColumns: + return tbl.TableInfo.Columns, nil, nil + case model.PredicateColumns: + columns, err := b.getColumnsBasedOnPredicateColumns( + tbl, + predicateCols, + mustAnalyzedCols, + mustAllColumns, + ) if err != nil { return nil, nil, err } - colSet := combineColumnSets(predicate, mustAnalyzed) - return getColumnListFromSet(tbl.TableInfo.Columns, colSet), nil, nil + return columns, nil, nil case model.ColumnList: colSet := getColumnSetFromSpecifiedCols(specifiedCols) mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols) @@ -2058,6 +2077,26 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo( return nil, nil, nil } +func (b *PlanBuilder) getColumnsBasedOnPredicateColumns( + tbl *ast.TableName, + predicateCols, mustAnalyzedCols *calcOnceMap, + rewriteAllStatsNeeded bool, +) ([]*model.ColumnInfo, error) { + if rewriteAllStatsNeeded { + return tbl.TableInfo.Columns, nil + } + predicate, err := b.getPredicateColumns(tbl, predicateCols) + if err != nil { + return nil, err + } + mustAnalyzed, err := b.getMustAnalyzedColumns(tbl, mustAnalyzedCols) + if err != nil { + return nil, err + } + colSet := combineColumnSets(predicate, mustAnalyzed) + return getColumnListFromSet(tbl.TableInfo.Columns, colSet), nil +} + // Helper function to combine two column sets. func combineColumnSets(sets ...map[int64]struct{}) map[int64]struct{} { result := make(map[int64]struct{}) From 9e1c200a8c18bfbccbf67429e02aa02068ac4ccd Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 26 Jun 2024 17:12:01 +0800 Subject: [PATCH 02/18] test: add case for analyzing with predicate Signed-off-by: hi-rustin --- .../handle/usage/predicate_column_test.go | 48 ++++++++++++++++++- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index d3e0a40b25d1f..e437300ed8376 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -30,8 +30,6 @@ func TestCleanupPredicateColumns(t *testing.T) { tk.MustExec("use test") tk.MustExec("create table t (a int, b int)") tk.MustExec("insert into t values (1, 1), (2, 2), (3, 3)") - // Enable column tracking. - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustQuery("select * from t where a > 1").Check(testkit.Rows("2 2", "3 3")) tk.MustQuery("select * from t where b > 1").Check(testkit.Rows("2 2", "3 3")) @@ -54,3 +52,49 @@ func TestCleanupPredicateColumns(t *testing.T) { require.NoError(t, err) require.Len(t, columns, 1) } + +func TestAnalyzeTableWithPredicateColumns(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + // Create table and select data with predicate. + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int, c int)") + tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)") + tk.MustQuery("select * from t where a > 1").Check(testkit.Rows("2 2 2", "3 3 3")) + + // Dump the statistics usage. + h := dom.StatsHandle() + err := h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + + // Analyze table and check analyze jobs. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table columns a with 256 buckets, 100 topn, 1 samplerate"), + ) + + // More columns. + tk.MustQuery("select * from t where b > 1").Check(testkit.Rows("2 2 2", "3 3 3")) + + // Dump the statistics usage. + err = h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Analyze again. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"), + ) + + // Set tidb_analyze_column_options to ALL. + tk.MustExec("set global tidb_analyze_column_options='ALL'") + // Analyze again. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), + ) +} From b29e96d2731ae21cea273315b56a6789140c5bb4 Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 26 Jun 2024 17:54:49 +0800 Subject: [PATCH 03/18] test: add TestAnalyzeTableWithTiDBPersistAnalyzeOptionsEnabled Signed-off-by: hi-rustin --- .../handle/usage/predicate_column_test.go | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index e437300ed8376..c63792a4ad5ee 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -98,3 +98,47 @@ func TestAnalyzeTableWithPredicateColumns(t *testing.T) { testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), ) } + +func TestAnalyzeTableWithTiDBPersistAnalyzeOptionsEnabled(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + // Check tidb_persist_analyze_options first. + tk.MustQuery("select @@tidb_persist_analyze_options").Check(testkit.Rows("1")) + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + // Create table and select data with predicate. + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int, c int)") + tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)") + tk.MustQuery("select * from t where a > 1").Check(testkit.Rows("2 2 2", "3 3 3")) + + // Dump the statistics usage. + h := dom.StatsHandle() + err := h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Analyze table with specified columns. + tk.MustExec("analyze table t columns a, b") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"), + ) + + // Analyze again, it should use the same options. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"), + ) + + // Analyze table with ALL syntax. + tk.MustExec("analyze table t all columns") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), + ) + + // Analyze again, it should use the same options. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), + ) +} From b3fd184ac9ab837ae68a1b4a558db56b2517381d Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 26 Jun 2024 18:06:17 +0800 Subject: [PATCH 04/18] test: add TestAnalyzeTableWithTiDBPersistAnalyzeOptionsDisabled Signed-off-by: hi-rustin --- .../handle/usage/predicate_column_test.go | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index c63792a4ad5ee..1a0e884c9d734 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -142,3 +142,39 @@ func TestAnalyzeTableWithTiDBPersistAnalyzeOptionsEnabled(t *testing.T) { testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), ) } + +func TestAnalyzeTableWithTiDBPersistAnalyzeOptionsDisabled(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + // Check tidb_persist_analyze_options first. + tk.MustQuery("select @@tidb_persist_analyze_options").Check(testkit.Rows("1")) + // Disable tidb_persist_analyze_options. + tk.MustExec("set global tidb_persist_analyze_options = 0") + tk.MustQuery("select @@tidb_persist_analyze_options").Check(testkit.Rows("0")) + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + + // Create table and select data with predicate. + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int, c int)") + tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)") + tk.MustQuery("select * from t where a > 1").Check(testkit.Rows("2 2 2", "3 3 3")) + + // Dump the statistics usage. + h := dom.StatsHandle() + err := h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Analyze table with specified columns. + tk.MustExec("analyze table t columns a, b") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"), + ) + + // Analyze again, it should use the predicate columns. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table columns a with 256 buckets, 100 topn, 1 samplerate"), + ) +} From bdd9398f4f06744742585dcbadf00a967f351e7b Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 26 Jun 2024 18:22:40 +0800 Subject: [PATCH 05/18] test: add TestAnalyzeTableWhenV1StatsExists Signed-off-by: hi-rustin --- .../handle/usage/predicate_column_test.go | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index 1a0e884c9d734..fe8817e1e9021 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -178,3 +178,41 @@ func TestAnalyzeTableWithTiDBPersistAnalyzeOptionsDisabled(t *testing.T) { testkit.Rows("t analyze table columns a with 256 buckets, 100 topn, 1 samplerate"), ) } + +func TestAnalyzeTableWhenV1StatsExists(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + + // Create table and select data with predicate. + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int, c int)") + tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)") + tk.MustQuery("select * from t where a > 1").Check(testkit.Rows("2 2 2", "3 3 3")) + // Dump the statistics usage. + h := dom.StatsHandle() + err := h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Set tidb_analyze_version to 1. + tk.MustExec("set tidb_analyze_version = 1") + tk.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("1")) + + // Analyze table. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze columns"), + ) + + // Set tidb_analyze_version to 2. + tk.MustExec("set tidb_analyze_version = 2") + tk.MustQuery("select @@tidb_analyze_version").Check(testkit.Rows("2")) + + // Analyze table. It should analyze all columns because the v1 stats exists. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), + ) +} From 5718b08b260dc9c2ecc87257a6ab9a60c9939bdc Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 26 Jun 2024 18:26:56 +0800 Subject: [PATCH 06/18] chore: prepare bazel Signed-off-by: hi-rustin --- pkg/statistics/handle/usage/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/statistics/handle/usage/BUILD.bazel b/pkg/statistics/handle/usage/BUILD.bazel index b918b0bc8385b..7fd20d5d18844 100644 --- a/pkg/statistics/handle/usage/BUILD.bazel +++ b/pkg/statistics/handle/usage/BUILD.bazel @@ -40,7 +40,7 @@ go_test( ], embed = [":usage"], flaky = True, - shard_count = 3, + shard_count = 7, deps = [ "//pkg/infoschema", "//pkg/parser/model", From 84c048a284dae204077817d41450a3bf95f9c294 Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 26 Jun 2024 19:12:19 +0800 Subject: [PATCH 07/18] test: add TestAutoAnalyzeWithPredicateColumns Signed-off-by: hi-rustin --- pkg/statistics/handle/autoanalyze/BUILD.bazel | 2 +- .../handle/autoanalyze/autoanalyze_test.go | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/pkg/statistics/handle/autoanalyze/BUILD.bazel b/pkg/statistics/handle/autoanalyze/BUILD.bazel index a7fbee2be0bfd..09e38900978e5 100644 --- a/pkg/statistics/handle/autoanalyze/BUILD.bazel +++ b/pkg/statistics/handle/autoanalyze/BUILD.bazel @@ -35,7 +35,7 @@ go_test( timeout = "short", srcs = ["autoanalyze_test.go"], flaky = True, - shard_count = 12, + shard_count = 13, deps = [ ":autoanalyze", "//pkg/domain/infosync", diff --git a/pkg/statistics/handle/autoanalyze/autoanalyze_test.go b/pkg/statistics/handle/autoanalyze/autoanalyze_test.go index 86e24739416d6..521c886dd4c55 100644 --- a/pkg/statistics/handle/autoanalyze/autoanalyze_test.go +++ b/pkg/statistics/handle/autoanalyze/autoanalyze_test.go @@ -90,6 +90,37 @@ func TestAutoAnalyzeLockedTable(t *testing.T) { require.True(t, dom.StatsHandle().HandleAutoAnalyze()) } +func TestAutoAnalyzeWithPredicateColumns(t *testing.T) { + // Create a table and add it to stats cache. + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int)") + tk.MustExec("insert into t values (1, 1)") + tk.MustQuery("select * from t where a > 0").Check(testkit.Rows("1 1")) + h := dom.StatsHandle() + err := h.HandleDDLEvent(<-h.DDLEventCh()) + require.NoError(t, err) + require.NoError(t, h.DumpStatsDeltaToKV(true)) + is := dom.InfoSchema() + require.NoError(t, h.Update(is)) + exec.AutoAnalyzeMinCnt = 0 + defer func() { + exec.AutoAnalyzeMinCnt = 1000 + }() + + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + + // Trigger auto analyze. + require.True(t, dom.StatsHandle().HandleAutoAnalyze()) + + // Check analyze jobs. + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t auto analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), + ) +} + func TestDisableAutoAnalyze(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) tk := testkit.NewTestKit(t, store) From 14d47ea11aea89b98f63fbb3db8c176c89cfc9f3 Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Wed, 26 Jun 2024 19:14:48 +0800 Subject: [PATCH 08/18] test: add TestAnalyzeWhenNoPredicateColumns Signed-off-by: hi-rustin --- pkg/statistics/handle/usage/BUILD.bazel | 2 +- .../handle/usage/predicate_column_test.go | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/pkg/statistics/handle/usage/BUILD.bazel b/pkg/statistics/handle/usage/BUILD.bazel index 7fd20d5d18844..467fdf4fe5240 100644 --- a/pkg/statistics/handle/usage/BUILD.bazel +++ b/pkg/statistics/handle/usage/BUILD.bazel @@ -40,7 +40,7 @@ go_test( ], embed = [":usage"], flaky = True, - shard_count = 7, + shard_count = 8, deps = [ "//pkg/infoschema", "//pkg/parser/model", diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index fe8817e1e9021..8e0050a92df33 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -216,3 +216,27 @@ func TestAnalyzeTableWhenV1StatsExists(t *testing.T) { testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), ) } + +func TestAnalyzeWhenNoPredicateColumns(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + + // Create table and select data without predicate. + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int, c int)") + tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)") + // Dump the statistics usage. + h := dom.StatsHandle() + err := h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Analyze table. + tk.MustExec("analyze table t") + // TODO: We should only analyze indexes here. + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), + ) +} From f60ff65f3b55efd60decf9fdd9dc1d9b30370edd Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Thu, 27 Jun 2024 15:28:19 +0800 Subject: [PATCH 09/18] fix: do not analyze all columns Signed-off-by: hi-rustin --- pkg/planner/core/planbuilder.go | 11 +++--- pkg/statistics/handle/usage/BUILD.bazel | 2 +- .../handle/usage/predicate_column_test.go | 36 +++++++++++++++++-- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index a84747b5e85c4..17217bf22261d 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -1977,10 +1977,13 @@ func (b *PlanBuilder) getPredicateColumns(tbl *ast.TableName, cols *calcOnceMap) return nil, err } if len(colList) == 0 { - b.ctx.GetSessionVars().StmtCtx.AppendWarning(errors.NewNoStackErrorf("No predicate column has been collected yet for table %s.%s so all columns are analyzed", tbl.Schema.L, tbl.Name.L)) - for _, colInfo := range tblInfo.Columns { - cols.data[colInfo.ID] = struct{}{} - } + b.ctx.GetSessionVars().StmtCtx.AppendWarning( + errors.NewNoStackErrorf( + "No predicate column has been collected yet for table %s.%s, so only indexes and the columns composing the indexes will be analyzed", + tbl.Schema.L, + tbl.Name.L, + ), + ) } else { for _, id := range colList { cols.data[id] = struct{}{} diff --git a/pkg/statistics/handle/usage/BUILD.bazel b/pkg/statistics/handle/usage/BUILD.bazel index 467fdf4fe5240..52bdddda32be5 100644 --- a/pkg/statistics/handle/usage/BUILD.bazel +++ b/pkg/statistics/handle/usage/BUILD.bazel @@ -40,7 +40,7 @@ go_test( ], embed = [":usage"], flaky = True, - shard_count = 8, + shard_count = 9, deps = [ "//pkg/infoschema", "//pkg/parser/model", diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index 8e0050a92df33..46f40cc9e9dea 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -217,7 +217,37 @@ func TestAnalyzeTableWhenV1StatsExists(t *testing.T) { ) } -func TestAnalyzeWhenNoPredicateColumns(t *testing.T) { +func TestAnalyzeWithNoPredicateColumns(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + + // Create table and select data without predicate. + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int, c int, index ia (a, b))") + tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)") + // Dump the statistics usage. + h := dom.StatsHandle() + err := h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Analyze table. + tk.MustExec("analyze table t") + // Show Warnings. + tk.MustQuery("show warnings").Check( + testkit.Rows( + "Warning 1105 No predicate column has been collected yet for table test.t, so only indexes and the columns composing the indexes will be analyzed", + "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", + ), + ) + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"), + ) +} + +func TestAnalyzeWithNoPredicateColumnsAndNoIndexes(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) tk := testkit.NewTestKit(t, store) @@ -235,8 +265,8 @@ func TestAnalyzeWhenNoPredicateColumns(t *testing.T) { // Analyze table. tk.MustExec("analyze table t") - // TODO: We should only analyze indexes here. + // FIXME: We should correct the job info or skip this kind of job. tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( - testkit.Rows("t analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), + testkit.Rows("t analyze table columns with 256 buckets, 100 topn, 1 samplerate]"), ) } From 407687a72394fd6035c96177c8cfcc706c7f51cd Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Thu, 27 Jun 2024 15:30:30 +0800 Subject: [PATCH 10/18] chore: add TODO Signed-off-by: hi-rustin --- pkg/statistics/handle/usage/predicate_column_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index 46f40cc9e9dea..24d0731ca3858 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -242,6 +242,7 @@ func TestAnalyzeWithNoPredicateColumns(t *testing.T) { "Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"", ), ) + // TODO: we should also include indexes in the job info. tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"), ) From 323172ab59a8839e9c3c5b29834b02cf1e0ed696 Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Thu, 27 Jun 2024 15:40:32 +0800 Subject: [PATCH 11/18] test: fix broken test Signed-off-by: hi-rustin --- pkg/statistics/handle/autoanalyze/autoanalyze_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/statistics/handle/autoanalyze/autoanalyze_test.go b/pkg/statistics/handle/autoanalyze/autoanalyze_test.go index 521c886dd4c55..bf3636d37894d 100644 --- a/pkg/statistics/handle/autoanalyze/autoanalyze_test.go +++ b/pkg/statistics/handle/autoanalyze/autoanalyze_test.go @@ -101,6 +101,7 @@ func TestAutoAnalyzeWithPredicateColumns(t *testing.T) { h := dom.StatsHandle() err := h.HandleDDLEvent(<-h.DDLEventCh()) require.NoError(t, err) + require.NoError(t, h.DumpColStatsUsageToKV()) require.NoError(t, h.DumpStatsDeltaToKV(true)) is := dom.InfoSchema() require.NoError(t, h.Update(is)) @@ -109,6 +110,13 @@ func TestAutoAnalyzeWithPredicateColumns(t *testing.T) { exec.AutoAnalyzeMinCnt = 1000 }() + // Check column_stats_usage. + rows := tk.MustQuery( + "show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null", + ).Rows() + require.Equal(t, 1, len(rows)) + require.Equal(t, "a", rows[0][3]) + // Set tidb_analyze_column_options to PREDICATE. tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") @@ -117,7 +125,7 @@ func TestAutoAnalyzeWithPredicateColumns(t *testing.T) { // Check analyze jobs. tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( - testkit.Rows("t auto analyze table all columns with 256 buckets, 100 topn, 1 samplerate"), + testkit.Rows("t auto analyze table columns a with 256 buckets, 100 topn, 1 samplerate"), ) } From be79b9332b20a3bc3215f5d11ae0867d9faaefb0 Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Thu, 27 Jun 2024 15:42:27 +0800 Subject: [PATCH 12/18] chore: remove useless code Signed-off-by: hi-rustin --- pkg/executor/test/analyzetest/analyze_test.go | 11 ----------- pkg/statistics/handle/updatetest/update_test.go | 2 -- 2 files changed, 13 deletions(-) diff --git a/pkg/executor/test/analyzetest/analyze_test.go b/pkg/executor/test/analyzetest/analyze_test.go index 207ffa379b1a5..d87e5be7ca313 100644 --- a/pkg/executor/test/analyzetest/analyze_test.go +++ b/pkg/executor/test/analyzetest/analyze_test.go @@ -1040,7 +1040,6 @@ func TestSavedAnalyzeColumnOptions(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal4)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("use test") tk.MustExec("set @@session.tidb_analyze_version = 2") @@ -1151,7 +1150,6 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where a > 1") require.NoError(t, h.DumpColStatsUsageToKV()) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() @@ -1219,7 +1217,6 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where c > 1") require.NoError(t, h.DumpColStatsUsageToKV()) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() @@ -1296,7 +1293,6 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where c > 1") require.NoError(t, h.DumpColStatsUsageToKV()) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() @@ -1378,7 +1374,6 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where a < 1") require.NoError(t, h.DumpColStatsUsageToKV()) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() @@ -1502,7 +1497,6 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where a < 1") require.NoError(t, h.DumpColStatsUsageToKV()) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() @@ -1606,7 +1600,6 @@ func TestAnalyzeColumnsWithExtendedStats(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where b > 1") require.NoError(t, h.DumpColStatsUsageToKV()) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() @@ -1676,7 +1669,6 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where b > 1") require.NoError(t, h.DumpColStatsUsageToKV()) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() @@ -1759,7 +1751,6 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where b > 1") require.NoError(t, h.DumpColStatsUsageToKV()) rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_used_at is not null").Rows() @@ -1853,7 +1844,6 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where b > 1") require.NoError(t, dom.StatsHandle().DumpColStatsUsageToKV()) tk.MustExec("analyze table t predicate columns") @@ -2167,7 +2157,6 @@ func TestShowAanalyzeStatusJobInfo(t *testing.T) { } checkJobInfo("analyze table columns b, c, d with 2 buckets, 2 topn, 1 samplerate") tk.MustExec("set global tidb_persist_analyze_options = 1") - tk.MustExec("set global tidb_enable_column_tracking = 1") tk.MustExec("select * from t where c > 1") h := dom.StatsHandle() require.NoError(t, h.DumpColStatsUsageToKV()) diff --git a/pkg/statistics/handle/updatetest/update_test.go b/pkg/statistics/handle/updatetest/update_test.go index 2bdb484726bd8..27fe2544670e3 100644 --- a/pkg/statistics/handle/updatetest/update_test.go +++ b/pkg/statistics/handle/updatetest/update_test.go @@ -905,7 +905,6 @@ func TestDumpColumnStatsUsage(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") h := dom.StatsHandle() tk.MustExec("use test") @@ -987,7 +986,6 @@ func TestCollectPredicateColumnsFromExecute(t *testing.T) { defer func() { tk.MustExec(fmt.Sprintf("set global tidb_enable_column_tracking = %v", originalVal2)) }() - tk.MustExec("set global tidb_enable_column_tracking = 1") h := dom.StatsHandle() tk.MustExec("use test") From d31c2d4fcb0ed684471a701729634563aeda9555 Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Thu, 27 Jun 2024 15:51:58 +0800 Subject: [PATCH 13/18] test: fix typo Signed-off-by: hi-rustin --- pkg/statistics/handle/usage/predicate_column_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index 24d0731ca3858..42c8fb426d768 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -268,6 +268,6 @@ func TestAnalyzeWithNoPredicateColumnsAndNoIndexes(t *testing.T) { tk.MustExec("analyze table t") // FIXME: We should correct the job info or skip this kind of job. tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( - testkit.Rows("t analyze table columns with 256 buckets, 100 topn, 1 samplerate]"), + testkit.Rows("t analyze table columns with 256 buckets, 100 topn, 1 samplerate"), ) } From 6ae1a3ebcafaa8528006362071bbf70f00590a8b Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Thu, 27 Jun 2024 16:19:34 +0800 Subject: [PATCH 14/18] test: add TestAnalyzeNoPredicateColumnsWithPrimaryKey Signed-off-by: hi-rustin --- .../handle/usage/predicate_column_test.go | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index 42c8fb426d768..cb39e7de4737d 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -217,7 +217,7 @@ func TestAnalyzeTableWhenV1StatsExists(t *testing.T) { ) } -func TestAnalyzeWithNoPredicateColumns(t *testing.T) { +func TestAnalyzeNoPredicateColumnsWithIndexes(t *testing.T) { store, dom := testkit.CreateMockStoreAndDomain(t) tk := testkit.NewTestKit(t, store) @@ -271,3 +271,26 @@ func TestAnalyzeWithNoPredicateColumnsAndNoIndexes(t *testing.T) { testkit.Rows("t analyze table columns with 256 buckets, 100 topn, 1 samplerate"), ) } + +func TestAnalyzeNoPredicateColumnsWithPrimaryKey(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + tk := testkit.NewTestKit(t, store) + + // Set tidb_analyze_column_options to PREDICATE. + tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") + + // Create table and select data without predicate. + tk.MustExec("use test") + tk.MustExec("create table t (a int, b int, c int, primary key (a, b))") + tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)") + // Dump the statistics usage. + h := dom.StatsHandle() + err := h.DumpColStatsUsageToKV() + require.NoError(t, err) + + // Analyze table. + tk.MustExec("analyze table t") + tk.MustQuery("select table_name, job_info from mysql.analyze_jobs order by id desc limit 1").Check( + testkit.Rows("t analyze table columns a, b with 256 buckets, 100 topn, 1 samplerate"), + ) +} From 04ccb5f029420fec4e6b0053a8c95163809bf33f Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Thu, 27 Jun 2024 16:34:16 +0800 Subject: [PATCH 15/18] fix: make bazel happy Signed-off-by: hi-rustin --- pkg/statistics/handle/usage/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/statistics/handle/usage/BUILD.bazel b/pkg/statistics/handle/usage/BUILD.bazel index 52bdddda32be5..87819d8d29c21 100644 --- a/pkg/statistics/handle/usage/BUILD.bazel +++ b/pkg/statistics/handle/usage/BUILD.bazel @@ -40,7 +40,7 @@ go_test( ], embed = [":usage"], flaky = True, - shard_count = 9, + shard_count = 10, deps = [ "//pkg/infoschema", "//pkg/parser/model", From a1950b8ae210c354a68dbb2975195b5447c53aad Mon Sep 17 00:00:00 2001 From: hi-rustin Date: Thu, 27 Jun 2024 17:29:28 +0800 Subject: [PATCH 16/18] test: fix broken test Signed-off-by: hi-rustin --- pkg/executor/test/analyzetest/analyze_test.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/executor/test/analyzetest/analyze_test.go b/pkg/executor/test/analyzetest/analyze_test.go index d87e5be7ca313..04d98d3d25fd1 100644 --- a/pkg/executor/test/analyzetest/analyze_test.go +++ b/pkg/executor/test/analyzetest/analyze_test.go @@ -1826,10 +1826,8 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) { tk.MustExec("analyze table t predicate columns") tk.MustQuery("show warnings").Sort().Check(testkit.Rows( `Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`, - "Warning 1105 No predicate column has been collected yet for table test.t so all columns are analyzed", + "Warning 1105 No predicate column has been collected yet for table test.t, so only indexes and the columns composing the indexes will be analyzed", )) - rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows() - require.Equal(t, 2, len(rows)) for _, val := range []model.ColumnChoice{model.ColumnList, model.PredicateColumns} { func(choice model.ColumnChoice) { From fcee8d6a9ee5e8119a9b091cb02482ee10c8c405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=8C=E6=89=8B=E6=8E=89=E5=8C=85=E5=B7=A5=E7=A8=8B?= =?UTF-8?q?=E5=B8=88?= Date: Fri, 28 Jun 2024 16:28:15 +0800 Subject: [PATCH 17/18] Update pkg/planner/core/planbuilder.go Co-authored-by: Arenatlx --- pkg/planner/core/planbuilder.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/planner/core/planbuilder.go b/pkg/planner/core/planbuilder.go index 17217bf22261d..4c52b980024a1 100644 --- a/pkg/planner/core/planbuilder.go +++ b/pkg/planner/core/planbuilder.go @@ -2039,7 +2039,7 @@ func (b *PlanBuilder) getFullAnalyzeColumnsInfo( return columns, nil, nil default: // Usually, this won't happen. - logutil.BgLogger().Warn("Unknown default column choice", zap.String("choice", columnOptions)) + logutil.BgLogger().Warn("Unknown default column choice, analyze all columns", zap.String("choice", columnOptions)) return tbl.TableInfo.Columns, nil, nil } case model.AllColumns: From 5a30f8764b86b1f20d82fd6809b74a3586d049da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=8C=E6=89=8B=E6=8E=89=E5=8C=85=E5=B7=A5=E7=A8=8B?= =?UTF-8?q?=E5=B8=88?= Date: Fri, 28 Jun 2024 16:29:06 +0800 Subject: [PATCH 18/18] Update pkg/statistics/handle/usage/predicate_column_test.go --- pkg/statistics/handle/usage/predicate_column_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/statistics/handle/usage/predicate_column_test.go b/pkg/statistics/handle/usage/predicate_column_test.go index cb39e7de4737d..a85e568395841 100644 --- a/pkg/statistics/handle/usage/predicate_column_test.go +++ b/pkg/statistics/handle/usage/predicate_column_test.go @@ -255,7 +255,6 @@ func TestAnalyzeWithNoPredicateColumnsAndNoIndexes(t *testing.T) { // Set tidb_analyze_column_options to PREDICATE. tk.MustExec("set global tidb_analyze_column_options='PREDICATE'") - // Create table and select data without predicate. tk.MustExec("use test") tk.MustExec("create table t (a int, b int, c int)") tk.MustExec("insert into t values (1, 1, 1), (2, 2, 2), (3, 3, 3)")