Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: log the reason why the sample-rate is chosen when analyzing table (#45938) #45968

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion executor/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,8 @@ func finishJobWithLog(sctx sessionctx.Context, job *statistics.AnalyzeJob, analy
zap.String("job info", job.JobInfo),
zap.Time("start time", job.StartTime),
zap.Time("end time", job.EndTime),
zap.String("cost", job.EndTime.Sub(job.StartTime).String()))
zap.String("cost", job.EndTime.Sub(job.StartTime).String()),
zap.String("sample rate reason", job.SampleRateReason))
}
}

Expand Down
4 changes: 2 additions & 2 deletions executor/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ func TestMergeGlobalStatsWithUnAnalyzedPartition(t *testing.T) {
tk.MustExec("analyze table t partition p2 index idxc;")
tk.MustQuery("show warnings").Check(testkit.Rows(
"Warning 1105 The version 2 would collect all statistics not only the selected indexes",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2"))
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p2, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\""))
tk.MustExec("analyze table t partition p0;")
tk.MustQuery("show warnings").Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0"))
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/2) as the sample-rate=1\""))
}
66 changes: 43 additions & 23 deletions executor/analyzetest/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -668,14 +668,14 @@ func TestAdjustSampleRateNote(t *testing.T) {
result := tk.MustQuery("show stats_meta where table_name = 't'")
require.Equal(t, "220000", result.Rows()[0][5])
tk.MustExec("analyze table t")
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 0.500000 for table test.t"))
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 0.500000 for table test.t, reason to use this rate is \"use min(1, 110000/220000) as the sample-rate=0.5\""))
tk.MustExec("insert into t values(1),(1),(1)")
require.NoError(t, statsHandle.DumpStatsDeltaToKV(handle.DumpAll))
require.NoError(t, statsHandle.Update(is))
result = tk.MustQuery("show stats_meta where table_name = 't'")
require.Equal(t, "3", result.Rows()[0][5])
tk.MustExec("analyze table t")
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t"))
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/3) as the sample-rate=1\""))
}

func TestFastAnalyze4GlobalStats(t *testing.T) {
Expand Down Expand Up @@ -924,7 +924,7 @@ func TestSmallTableAnalyzeV2(t *testing.T) {
tk.MustExec("create table small_table_inject_pd(a int)")
tk.MustExec("insert into small_table_inject_pd values(1), (2), (3), (4), (5)")
tk.MustExec("analyze table small_table_inject_pd")
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd"))
tk.MustQuery("show warnings").Check(testkit.Rows("Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\""))
tk.MustExec(`
create table small_table_inject_pd_with_partition(
a int
Expand All @@ -936,9 +936,9 @@ create table small_table_inject_pd_with_partition(
tk.MustExec("insert into small_table_inject_pd_with_partition values(1), (6), (11)")
tk.MustExec("analyze table small_table_inject_pd_with_partition")
tk.MustQuery("show warnings").Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p2",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.small_table_inject_pd_with_partition's partition p2, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
))
rows := [][]interface{}{
{"global", "a"},
Expand Down Expand Up @@ -1435,7 +1435,7 @@ func TestAnalyzeColumnsWithPrimaryKey(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns a with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1503,7 +1503,7 @@ func TestAnalyzeColumnsWithIndex(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns c with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1580,7 +1580,7 @@ func TestAnalyzeColumnsWithClusteredIndex(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns c with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns b,d are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1661,8 +1661,8 @@ func TestAnalyzeColumnsWithDynamicPartitionTable(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns a with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1813,8 +1813,8 @@ func TestAnalyzeColumnsWithStaticPartitionTable(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns a with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1918,7 +1918,7 @@ func TestAnalyzeColumnsWithExtendedStats(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns b with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -1988,7 +1988,7 @@ func TestAnalyzeColumnsWithVirtualColumnIndex(t *testing.T) {
case model.ColumnList:
tk.MustExec("analyze table t columns b with 2 topn, 2 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns c are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
))
case model.PredicateColumns:
Expand Down Expand Up @@ -2110,6 +2110,26 @@ func TestAnalyzeColumnsAfterAnalyzeAll(t *testing.T) {
}
}

func TestAnalyzeSampleRateReason(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)

tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a int, b int)")
require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll))

tk.MustExec(`analyze table t`)
tk.MustQuery(`show warnings`).Sort().Check(testkit.Rows(
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`))

tk.MustExec(`insert into t values (1, 1), (2, 2), (3, 3)`)
require.NoError(t, dom.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll))
tk.MustExec(`analyze table t`)
tk.MustQuery(`show warnings`).Sort().Check(testkit.Rows(
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "TiDB assumes that the table is empty, use sample-rate=1"`))
}

func TestAnalyzeColumnsErrorAndWarning(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)

Expand All @@ -2134,7 +2154,7 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) {
// If no predicate column is collected, analyze predicate columns gives a warning and falls back to analyze all columns.
tk.MustExec("analyze table t predicate columns")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "use min(1, 110000/10000) as the sample-rate=1"`,
"Warning 1105 No predicate column has been collected yet for table test.t so all columns are analyzed",
))
rows := tk.MustQuery("show column_stats_usage where db_name = 'test' and table_name = 't' and last_analyzed_at is not null").Rows()
Expand All @@ -2159,7 +2179,7 @@ func TestAnalyzeColumnsErrorAndWarning(t *testing.T) {
tk.MustExec("analyze table t predicate columns")
}
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
`Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is "TiDB assumes that the table is empty, use sample-rate=1"`,
"Warning 1105 Table test.t has version 1 statistics so all the columns must be analyzed to overwrite the current statistics",
))
}(val)
Expand Down Expand Up @@ -2741,7 +2761,7 @@ PARTITION BY RANGE ( a ) (
// analyze partition with options under dynamic mode
tk.MustExec("analyze table t partition p0 columns a,b,c with 1 topn, 3 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Ignore columns and options when analyze partition in dynamic mode",
"Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`",
"Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`",
Expand All @@ -2755,7 +2775,7 @@ PARTITION BY RANGE ( a ) (

tk.MustExec("analyze table t partition p0")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p0, reason to use this rate is \"use min(1, 110000/9) as the sample-rate=1\"",
"Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`",
"Warning 8131 Build global-level stats failed due to missing partition-level stats: table `t` partition `p1`",
))
Expand Down Expand Up @@ -2810,7 +2830,7 @@ PARTITION BY RANGE ( a ) (
tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic'")
tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions",
))

Expand All @@ -2819,7 +2839,7 @@ PARTITION BY RANGE ( a ) (
tk.MustExec("set global tidb_persist_analyze_options = true")
tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/5) as the sample-rate=1\"",
"Warning 1105 Ignore columns and options when analyze partition in dynamic mode",
"Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions",
))
Expand All @@ -2828,7 +2848,7 @@ PARTITION BY RANGE ( a ) (
tk.MustExec("insert into mysql.analyze_options values (?,?,?,?,?,?,?)", pi.Definitions[1].ID, 0, 0, 1, 1, "DEFAULT", "")
tk.MustExec("analyze table t partition p1 columns a,b,d with 1 topn, 3 buckets")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t's partition p1, reason to use this rate is \"use min(1, 110000/5) as the sample-rate=1\"",
"Warning 1105 Ignore columns and options when analyze partition in dynamic mode",
"Warning 8244 Build global-level stats failed due to missing partition-level column stats: table `t` partition `p0` column `d`, please run analyze table to refresh columns of all partitions",
))
Expand Down Expand Up @@ -3233,7 +3253,7 @@ func TestAnalyzeColumnsSkipMVIndexJsonCol(t *testing.T) {

tk.MustExec("analyze table t columns a")
tk.MustQuery("show warnings").Sort().Check(testkit.Rows(""+
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t",
"Note 1105 Analyze use auto adjusted sample rate 1.000000 for table test.t, reason to use this rate is \"use min(1, 110000/10000) as the sample-rate=1\"",
"Warning 1105 Columns b are missing in ANALYZE but their stats are needed for calculating stats for indexes/primary key/extended stats",
"Warning 1105 analyzing multi-valued indexes is not supported, skip idx_c"))
tk.MustQuery("select job_info from mysql.analyze_jobs where table_schema = 'test' and table_name = 't'").Check(testkit.Rows(
Expand Down
Loading