Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: add some test cases of global-stats to cover more column types #23138

Merged
merged 7 commits into from
Mar 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 260 additions & 0 deletions statistics/handle/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,266 @@ func (s *testStatsSuite) TestGlobalStatsData2(c *C) {
testkit.Rows("12 1", // global, g = p0 + p1
"5 1", // p0
"7 0")) // p1

// double + (column + index with 1 column)
tk.MustExec("drop table if exists tdouble")
tk.MustExec(`create table tdouble (a int, c double, key(c)) partition by range (a)` +
`(partition p0 values less than(10),partition p1 values less than(20))`)
tk.MustExec(`insert into tdouble values ` +
`(1, 1), (2, 2), (3, 3), (4, 4), (4, 4), (5, 5), (5, 5), (5, 5), (null, null), ` + // values in p0
`(11, 11), (12, 12), (13, 13), (14, 14), (15, 15), (16, 16), (16, 16), (16, 16), (16, 16), (17, 17), (17, 17)`) // values in p1
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)
tk.MustExec("analyze table tdouble with 2 topn, 2 buckets")

rs := tk.MustQuery("show stats_meta where table_name='tdouble'").Rows()
c.Assert(rs[0][5].(string), Equals, "20") // g.count = p0.count + p1.count
c.Assert(rs[1][5].(string), Equals, "9") // p0.count
c.Assert(rs[2][5].(string), Equals, "11") // p1.count

tk.MustQuery("show stats_topn where table_name='tdouble' and is_index=0 and column_name='c'").Check(testkit.Rows(
`test tdouble global c 0 5 3`,
`test tdouble global c 0 16 4`,
`test tdouble p0 c 0 4 2`,
`test tdouble p0 c 0 5 3`,
`test tdouble p1 c 0 16 4`,
`test tdouble p1 c 0 17 2`))

tk.MustQuery("show stats_topn where table_name='tdouble' and is_index=1 and column_name='c'").Check(testkit.Rows(
`test tdouble global c 1 5 3`,
`test tdouble global c 1 16 4`,
`test tdouble p0 c 1 4 2`,
`test tdouble p0 c 1 5 3`,
`test tdouble p1 c 1 16 4`,
`test tdouble p1 c 1 17 2`))

tk.MustQuery("show stats_buckets where table_name='tdouble' and is_index=0 and column_name='c'").Check(testkit.Rows(
// db, tbl, part, col, isIdx, bucketID, count, repeat, lower, upper, ndv
"test tdouble global c 0 0 5 2 1 4 0", // bucket.ndv is not maintained for column histograms
"test tdouble global c 0 1 12 2 4 17 0",
"test tdouble p0 c 0 0 2 1 1 2 0",
"test tdouble p0 c 0 1 3 1 3 3 0",
"test tdouble p1 c 0 0 3 1 11 13 0",
"test tdouble p1 c 0 1 5 1 14 15 0"))

rs = tk.MustQuery("show stats_histograms where table_name='tdouble' and column_name='c' and is_index=0").Rows()
c.Assert(rs[0][6].(string), Equals, "12") // g.ndv = p0 + p1
c.Assert(rs[1][6].(string), Equals, "5")
c.Assert(rs[2][6].(string), Equals, "7")
c.Assert(rs[0][7].(string), Equals, "1") // g.null_count = p0 + p1
c.Assert(rs[1][7].(string), Equals, "1")
c.Assert(rs[2][7].(string), Equals, "0")

tk.MustQuery("show stats_buckets where table_name='tdouble' and is_index=1 and column_name='c'").Check(testkit.Rows(
// db, tbl, part, col, isIdx, bucketID, count, repeat, lower, upper, ndv
"test tdouble global c 1 0 5 0 1 5 4", // 4 is popped from p0.TopN, so g.ndv = p0.ndv+1
"test tdouble global c 1 1 12 2 5 17 6",
"test tdouble p0 c 1 0 3 0 1 4 3",
"test tdouble p0 c 1 1 3 0 5 5 0",
"test tdouble p1 c 1 0 5 0 11 16 5",
"test tdouble p1 c 1 1 5 0 17 17 0"))

rs = tk.MustQuery("show stats_histograms where table_name='tdouble' and column_name='c' and is_index=1").Rows()
c.Assert(rs[0][6].(string), Equals, "12") // g.ndv = p0 + p1
c.Assert(rs[1][6].(string), Equals, "5")
c.Assert(rs[2][6].(string), Equals, "7")
c.Assert(rs[0][7].(string), Equals, "1") // g.null_count = p0 + p1
c.Assert(rs[1][7].(string), Equals, "1")
c.Assert(rs[2][7].(string), Equals, "0")

// decimal + (column + index with 1 column)
tk.MustExec("drop table if exists tdecimal")
tk.MustExec(`create table tdecimal (a int, c decimal(10, 2), key(c)) partition by range (a)` +
`(partition p0 values less than(10),partition p1 values less than(20))`)
tk.MustExec(`insert into tdecimal values ` +
`(1, 1), (2, 2), (3, 3), (4, 4), (4, 4), (5, 5), (5, 5), (5, 5), (null, null), ` + // values in p0
`(11, 11), (12, 12), (13, 13), (14, 14), (15, 15), (16, 16), (16, 16), (16, 16), (16, 16), (17, 17), (17, 17)`) // values in p1
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)
tk.MustExec("analyze table tdecimal with 2 topn, 2 buckets")

rs = tk.MustQuery("show stats_meta where table_name='tdecimal'").Rows()
c.Assert(rs[0][5].(string), Equals, "20") // g.count = p0.count + p1.count
c.Assert(rs[1][5].(string), Equals, "9") // p0.count
c.Assert(rs[2][5].(string), Equals, "11") // p1.count

tk.MustQuery("show stats_topn where table_name='tdecimal' and is_index=0 and column_name='c'").Check(testkit.Rows(
`test tdecimal global c 0 5.00 3`,
`test tdecimal global c 0 16.00 4`,
`test tdecimal p0 c 0 4.00 2`,
`test tdecimal p0 c 0 5.00 3`,
`test tdecimal p1 c 0 16.00 4`,
`test tdecimal p1 c 0 17.00 2`))

tk.MustQuery("show stats_topn where table_name='tdecimal' and is_index=1 and column_name='c'").Check(testkit.Rows(
`test tdecimal global c 1 5.00 3`,
`test tdecimal global c 1 16.00 4`,
`test tdecimal p0 c 1 4.00 2`,
`test tdecimal p0 c 1 5.00 3`,
`test tdecimal p1 c 1 16.00 4`,
`test tdecimal p1 c 1 17.00 2`))

tk.MustQuery("show stats_buckets where table_name='tdecimal' and is_index=0 and column_name='c'").Check(testkit.Rows(
// db, tbl, part, col, isIdx, bucketID, count, repeat, lower, upper, ndv
"test tdecimal global c 0 0 5 2 1.00 4.00 0", // bucket.ndv is not maintained for column histograms
"test tdecimal global c 0 1 12 2 4.00 17.00 0",
"test tdecimal p0 c 0 0 2 1 1.00 2.00 0",
"test tdecimal p0 c 0 1 3 1 3.00 3.00 0",
"test tdecimal p1 c 0 0 3 1 11.00 13.00 0",
"test tdecimal p1 c 0 1 5 1 14.00 15.00 0"))

rs = tk.MustQuery("show stats_histograms where table_name='tdecimal' and column_name='c' and is_index=0").Rows()
c.Assert(rs[0][6].(string), Equals, "12") // g.ndv = p0 + p1
c.Assert(rs[1][6].(string), Equals, "5")
c.Assert(rs[2][6].(string), Equals, "7")
c.Assert(rs[0][7].(string), Equals, "1") // g.null_count = p0 + p1
c.Assert(rs[1][7].(string), Equals, "1")
c.Assert(rs[2][7].(string), Equals, "0")

tk.MustQuery("show stats_buckets where table_name='tdecimal' and is_index=1 and column_name='c'").Check(testkit.Rows(
// db, tbl, part, col, isIdx, bucketID, count, repeat, lower, upper, ndv
"test tdecimal global c 1 0 5 0 1.00 5.00 4", // 4 is popped from p0.TopN, so g.ndv = p0.ndv+1
"test tdecimal global c 1 1 12 2 5.00 17.00 6",
"test tdecimal p0 c 1 0 3 0 1.00 4.00 3",
"test tdecimal p0 c 1 1 3 0 5.00 5.00 0",
"test tdecimal p1 c 1 0 5 0 11.00 16.00 5",
"test tdecimal p1 c 1 1 5 0 17.00 17.00 0"))

rs = tk.MustQuery("show stats_histograms where table_name='tdecimal' and column_name='c' and is_index=1").Rows()
c.Assert(rs[0][6].(string), Equals, "12") // g.ndv = p0 + p1
c.Assert(rs[1][6].(string), Equals, "5")
c.Assert(rs[2][6].(string), Equals, "7")
c.Assert(rs[0][7].(string), Equals, "1") // g.null_count = p0 + p1
c.Assert(rs[1][7].(string), Equals, "1")
c.Assert(rs[2][7].(string), Equals, "0")

// datetime + (column + index with 1 column)
tk.MustExec("drop table if exists tdatetime")
tk.MustExec(`create table tdatetime (a int, c datetime, key(c)) partition by range (a)` +
`(partition p0 values less than(10),partition p1 values less than(20))`)
tk.MustExec(`insert into tdatetime values ` +
`(1, '2000-01-01'), (2, '2000-01-02'), (3, '2000-01-03'), (4, '2000-01-04'), (4, '2000-01-04'), (5, '2000-01-05'), (5, '2000-01-05'), (5, '2000-01-05'), (null, null), ` + // values in p0
`(11, '2000-01-11'), (12, '2000-01-12'), (13, '2000-01-13'), (14, '2000-01-14'), (15, '2000-01-15'), (16, '2000-01-16'), (16, '2000-01-16'), (16, '2000-01-16'), (16, '2000-01-16'), (17, '2000-01-17'), (17, '2000-01-17')`) // values in p1
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)
tk.MustExec("analyze table tdatetime with 2 topn, 2 buckets")

rs = tk.MustQuery("show stats_meta where table_name='tdatetime'").Rows()
c.Assert(rs[0][5].(string), Equals, "20") // g.count = p0.count + p1.count
c.Assert(rs[1][5].(string), Equals, "9") // p0.count
c.Assert(rs[2][5].(string), Equals, "11") // p1.count

tk.MustQuery("show stats_topn where table_name='tdatetime' and is_index=0 and column_name='c'").Check(testkit.Rows(
`test tdatetime global c 0 2000-01-05 00:00:00 3`,
`test tdatetime global c 0 2000-01-16 00:00:00 4`,
`test tdatetime p0 c 0 2000-01-04 00:00:00 2`,
`test tdatetime p0 c 0 2000-01-05 00:00:00 3`,
`test tdatetime p1 c 0 2000-01-16 00:00:00 4`,
`test tdatetime p1 c 0 2000-01-17 00:00:00 2`))

tk.MustQuery("show stats_topn where table_name='tdatetime' and is_index=1 and column_name='c'").Check(testkit.Rows(
`test tdatetime global c 1 2000-01-05 00:00:00 3`,
`test tdatetime global c 1 2000-01-16 00:00:00 4`,
`test tdatetime p0 c 1 2000-01-04 00:00:00 2`,
`test tdatetime p0 c 1 2000-01-05 00:00:00 3`,
`test tdatetime p1 c 1 2000-01-16 00:00:00 4`,
`test tdatetime p1 c 1 2000-01-17 00:00:00 2`))

tk.MustQuery("show stats_buckets where table_name='tdatetime' and is_index=0 and column_name='c'").Check(testkit.Rows(
// db, tbl, part, col, isIdx, bucketID, count, repeat, lower, upper, ndv
"test tdatetime global c 0 0 5 2 2000-01-01 00:00:00 2000-01-04 00:00:00 0", // bucket.ndv is not maintained for column histograms
"test tdatetime global c 0 1 12 2 2000-01-04 00:00:00 2000-01-17 00:00:00 0",
"test tdatetime p0 c 0 0 2 1 2000-01-01 00:00:00 2000-01-02 00:00:00 0",
"test tdatetime p0 c 0 1 3 1 2000-01-03 00:00:00 2000-01-03 00:00:00 0",
"test tdatetime p1 c 0 0 3 1 2000-01-11 00:00:00 2000-01-13 00:00:00 0",
"test tdatetime p1 c 0 1 5 1 2000-01-14 00:00:00 2000-01-15 00:00:00 0"))

rs = tk.MustQuery("show stats_histograms where table_name='tdatetime' and column_name='c' and is_index=0").Rows()
c.Assert(rs[0][6].(string), Equals, "12") // g.ndv = p0 + p1
c.Assert(rs[1][6].(string), Equals, "5")
c.Assert(rs[2][6].(string), Equals, "7")
c.Assert(rs[0][7].(string), Equals, "1") // g.null_count = p0 + p1
c.Assert(rs[1][7].(string), Equals, "1")
c.Assert(rs[2][7].(string), Equals, "0")

tk.MustQuery("show stats_buckets where table_name='tdatetime' and is_index=1 and column_name='c'").Check(testkit.Rows(
// db, tbl, part, col, isIdx, bucketID, count, repeat, lower, upper, ndv
"test tdatetime global c 1 0 5 0 2000-01-01 00:00:00 2000-01-05 00:00:00 4", // 4 is popped from p0.TopN, so g.ndv = p0.ndv+1
"test tdatetime global c 1 1 12 2 2000-01-05 00:00:00 2000-01-17 00:00:00 6",
"test tdatetime p0 c 1 0 3 0 2000-01-01 00:00:00 2000-01-04 00:00:00 3",
"test tdatetime p0 c 1 1 3 0 2000-01-05 00:00:00 2000-01-05 00:00:00 0",
"test tdatetime p1 c 1 0 5 0 2000-01-11 00:00:00 2000-01-16 00:00:00 5",
"test tdatetime p1 c 1 1 5 0 2000-01-17 00:00:00 2000-01-17 00:00:00 0"))

rs = tk.MustQuery("show stats_histograms where table_name='tdatetime' and column_name='c' and is_index=1").Rows()
c.Assert(rs[0][6].(string), Equals, "12") // g.ndv = p0 + p1
c.Assert(rs[1][6].(string), Equals, "5")
c.Assert(rs[2][6].(string), Equals, "7")
c.Assert(rs[0][7].(string), Equals, "1") // g.null_count = p0 + p1
c.Assert(rs[1][7].(string), Equals, "1")
c.Assert(rs[2][7].(string), Equals, "0")

// string + (column + index with 1 column)
tk.MustExec("drop table if exists tstring")
tk.MustExec(`create table tstring (a int, c varchar(32), key(c)) partition by range (a)` +
`(partition p0 values less than(10),partition p1 values less than(20))`)
tk.MustExec(`insert into tstring values ` +
`(1, 'a1'), (2, 'a2'), (3, 'a3'), (4, 'a4'), (4, 'a4'), (5, 'a5'), (5, 'a5'), (5, 'a5'), (null, null), ` + // values in p0
`(11, 'b11'), (12, 'b12'), (13, 'b13'), (14, 'b14'), (15, 'b15'), (16, 'b16'), (16, 'b16'), (16, 'b16'), (16, 'b16'), (17, 'b17'), (17, 'b17')`) // values in p1
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)
tk.MustExec("analyze table tstring with 2 topn, 2 buckets")

rs = tk.MustQuery("show stats_meta where table_name='tstring'").Rows()
c.Assert(rs[0][5].(string), Equals, "20") // g.count = p0.count + p1.count
c.Assert(rs[1][5].(string), Equals, "9") // p0.count
c.Assert(rs[2][5].(string), Equals, "11") // p1.count

tk.MustQuery("show stats_topn where table_name='tstring' and is_index=0 and column_name='c'").Check(testkit.Rows(
`test tstring global c 0 a5 3`,
`test tstring global c 0 b16 4`,
`test tstring p0 c 0 a4 2`,
`test tstring p0 c 0 a5 3`,
`test tstring p1 c 0 b16 4`,
`test tstring p1 c 0 b17 2`))

tk.MustQuery("show stats_topn where table_name='tstring' and is_index=1 and column_name='c'").Check(testkit.Rows(
`test tstring global c 1 a5 3`,
`test tstring global c 1 b16 4`,
`test tstring p0 c 1 a4 2`,
`test tstring p0 c 1 a5 3`,
`test tstring p1 c 1 b16 4`,
`test tstring p1 c 1 b17 2`))

tk.MustQuery("show stats_buckets where table_name='tstring' and is_index=0 and column_name='c'").Check(testkit.Rows(
// db, tbl, part, col, isIdx, bucketID, count, repeat, lower, upper, ndv
"test tstring global c 0 0 5 2 a1 a4 0", // bucket.ndv is not maintained for column histograms
"test tstring global c 0 1 12 2 a4 b17 0",
"test tstring p0 c 0 0 2 1 a1 a2 0",
"test tstring p0 c 0 1 3 1 a3 a3 0",
"test tstring p1 c 0 0 3 1 b11 b13 0",
"test tstring p1 c 0 1 5 1 b14 b15 0"))

rs = tk.MustQuery("show stats_histograms where table_name='tstring' and column_name='c' and is_index=0").Rows()
c.Assert(rs[0][6].(string), Equals, "12") // g.ndv = p0 + p1
c.Assert(rs[1][6].(string), Equals, "5")
c.Assert(rs[2][6].(string), Equals, "7")
c.Assert(rs[0][7].(string), Equals, "1") // g.null_count = p0 + p1
c.Assert(rs[1][7].(string), Equals, "1")
c.Assert(rs[2][7].(string), Equals, "0")

tk.MustQuery("show stats_buckets where table_name='tstring' and is_index=1 and column_name='c'").Check(testkit.Rows(
// db, tbl, part, col, isIdx, bucketID, count, repeat, lower, upper, ndv
"test tstring global c 1 0 5 0 a1 a5 4", // 4 is popped from p0.TopN, so g.ndv = p0.ndv+1
"test tstring global c 1 1 12 2 a5 b17 6",
"test tstring p0 c 1 0 3 0 a1 a4 3",
"test tstring p0 c 1 1 3 0 a5 a5 0",
"test tstring p1 c 1 0 5 0 b11 b16 5",
"test tstring p1 c 1 1 5 0 b17 b17 0"))

rs = tk.MustQuery("show stats_histograms where table_name='tstring' and column_name='c' and is_index=1").Rows()
c.Assert(rs[0][6].(string), Equals, "12") // g.ndv = p0 + p1
c.Assert(rs[1][6].(string), Equals, "5")
c.Assert(rs[2][6].(string), Equals, "7")
c.Assert(rs[0][7].(string), Equals, "1") // g.null_count = p0 + p1
c.Assert(rs[1][7].(string), Equals, "1")
c.Assert(rs[2][7].(string), Equals, "0")
}

func (s *testStatsSuite) TestGlobalStatsVersion(c *C) {
Expand Down
7 changes: 6 additions & 1 deletion statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -1831,7 +1831,12 @@ func MergePartitionHist2GlobalHist(sc *stmtctx.StatementContext, hists []*Histog
d.SetBytes(meta.Encoded)
} else {
var err error
_, d, err = codec.DecodeOne(meta.Encoded)
if types.IsTypeTime(hists[0].Tp.Tp) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we need to add some comments here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some comments have been added, PTAL @rebelice .

// handle datetime values specially since they are encoded to int and we'll get int values if using DecodeOne.
_, d, err = codec.DecodeAsDateTime(meta.Encoded, hists[0].Tp.Tp, sc.TimeZone)
} else {
_, d, err = codec.DecodeOne(meta.Encoded)
}
if err != nil {
return nil, err
}
Expand Down
3 changes: 2 additions & 1 deletion util/codec/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -748,7 +748,8 @@ func DecodeRange(b []byte, size int, idxColumnTypes []byte, loc *time.Location)
if i >= len(idxColumnTypes) {
return values, b, errors.New("invalid length of index's columns")
}
if idxColumnTypes[i] == mysql.TypeDatetime || idxColumnTypes[i] == mysql.TypeTimestamp || idxColumnTypes[i] == mysql.TypeDate {
if types.IsTypeTime(idxColumnTypes[i]) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

// handle datetime values specially since they are encoded to int and we'll get int values if using DecodeOne.
b, d, err = DecodeAsDateTime(b, idxColumnTypes[i], loc)
} else {
b, d, err = DecodeOne(b)
Expand Down