From 114d58ca98cd65b742a39af53e77a7bee180795b Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 27 Dec 2023 11:10:57 +0800 Subject: [PATCH 1/2] This is an automated cherry-pick of #49808 Signed-off-by: ti-chi-bot --- pkg/statistics/BUILD.bazel | 108 ++++++++++++++++++++++++++++++++++++ statistics/builder.go | 4 ++ statistics/cmsketch.go | 7 +++ statistics/cmsketch_test.go | 20 +++++++ 4 files changed, 139 insertions(+) create mode 100644 pkg/statistics/BUILD.bazel diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel new file mode 100644 index 0000000000000..6997f6ceb6d1e --- /dev/null +++ b/pkg/statistics/BUILD.bazel @@ -0,0 +1,108 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") + +go_library( + name = "statistics", + srcs = [ + "analyze.go", + "analyze_jobs.go", + "builder.go", + "builder_ext_stats.go", + "cmsketch.go", + "cmsketch_util.go", + "column.go", + "debugtrace.go", + "estimate.go", + "fmsketch.go", + "histogram.go", + "index.go", + "row_sampler.go", + "sample.go", + "scalar.go", + "table.go", + ], + importpath = "github.com/pingcap/tidb/pkg/statistics", + visibility = ["//visibility:public"], + deps = [ + "//pkg/expression", + "//pkg/kv", + "//pkg/parser/ast", + "//pkg/parser/charset", + "//pkg/parser/model", + "//pkg/parser/mysql", + "//pkg/parser/terror", + "//pkg/planner/util/debugtrace", + "//pkg/sessionctx", + "//pkg/sessionctx/stmtctx", + "//pkg/sessionctx/variable", + "//pkg/statistics/handle/logutil", + "//pkg/tablecodec", + "//pkg/types", + "//pkg/util/chunk", + "//pkg/util/codec", + "//pkg/util/collate", + "//pkg/util/dbterror", + "//pkg/util/fastrand", + "//pkg/util/hack", + "//pkg/util/intest", + "//pkg/util/logutil", + "//pkg/util/memory", + "//pkg/util/ranger", + "//pkg/util/sqlexec", + "@com_github_dolthub_swiss//:swiss", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_pingcap_tipb//go-tipb", + "@com_github_twmb_murmur3//:murmur3", + "@org_golang_x_exp//maps", + "@org_uber_go_atomic//:atomic", + "@org_uber_go_zap//:zap", + ], +) + +go_test( + name = "statistics_test", + timeout = "short", + srcs = [ + "bench_daily_test.go", + "builder_test.go", + "cmsketch_test.go", + "fmsketch_test.go", + "histogram_bench_test.go", + "histogram_test.go", + "integration_test.go", + "main_test.go", + "sample_test.go", + "scalar_test.go", + "statistics_test.go", + ], + data = glob(["testdata/**"]), + embed = [":statistics"], + flaky = True, + shard_count = 35, + deps = [ + "//pkg/config", + "//pkg/parser/ast", + "//pkg/parser/model", + "//pkg/parser/mysql", + "//pkg/sessionctx", + "//pkg/sessionctx/stmtctx", + "//pkg/statistics/handle/autoanalyze", + "//pkg/testkit", + "//pkg/testkit/testdata", + "//pkg/testkit/testmain", + "//pkg/testkit/testsetup", + "//pkg/types", + "//pkg/util/benchdaily", + "//pkg/util/chunk", + "//pkg/util/codec", + "//pkg/util/collate", + "//pkg/util/memory", + "//pkg/util/mock", + "//pkg/util/ranger", + "//pkg/util/sqlexec", + "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", + "@com_github_stretchr_testify//require", + "@org_uber_go_goleak//:goleak", + ], +) diff --git a/statistics/builder.go b/statistics/builder.go index ec116803e952d..ffd5e798a37d5 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -374,6 +374,7 @@ func BuildHistAndTopN( if err != nil { return nil, nil, errors.Trace(err) } +<<<<<<< HEAD:statistics/builder.go // For debugging invalid sample data. var ( foundTwice bool @@ -417,12 +418,15 @@ func BuildHistAndTopN( continue } } +======= +>>>>>>> 1fb5a9ae14a (planner: a better way to round scale factor when collecting TopN stats (#49808)):pkg/statistics/builder.go } for i := 0; i < len(topNList); i++ { topNList[i].Count *= uint64(sampleFactor) } topn := &TopN{TopN: topNList} + topn.Scale(sampleFactor) if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) { // TopN includes all sample data diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 9406d9eb7a5b2..0757e7a0a3d4d 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -495,6 +495,13 @@ type TopN struct { TopN []TopNMeta } +// Scale scales the TopN by the given factor. +func (c *TopN) Scale(scaleFactor float64) { + for i := range c.TopN { + c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor) + } +} + // AppendTopN appends a topn into the TopN struct. func (c *TopN) AppendTopN(data []byte, count uint64) { if c == nil { diff --git a/statistics/cmsketch_test.go b/statistics/cmsketch_test.go index 1585342d8826b..8f9bccf3cfb56 100644 --- a/statistics/cmsketch_test.go +++ b/statistics/cmsketch_test.go @@ -390,3 +390,23 @@ func TestMergePartTopN2GlobalTopNWithHists(t *testing.T) { require.Equal(t, uint64(55), globalTopN.TotalCount(), "should have 55") require.Len(t, leftTopN, 1, "should have 1 left topN") } + +func TestTopNScale(t *testing.T) { + for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} { + var data []TopNMeta + sumCount := uint64(0) + for i := 0; i < 20; i++ { + cnt := uint64(rand.Intn(100000)) + data = append(data, TopNMeta{ + Count: cnt, + }) + sumCount += cnt + } + topN := TopN{TopN: data} + topN.Scale(scaleFactor) + scaleCount := float64(sumCount) * scaleFactor + delta := math.Abs(float64(topN.TotalCount()) - scaleCount) + roundErrorRatio := delta / scaleCount + require.Less(t, roundErrorRatio, 0.0001) + } +} From 4a64ab1390d28fc79c4015112bb15434559c3e0e Mon Sep 17 00:00:00 2001 From: qw4990 Date: Thu, 28 Dec 2023 11:06:12 +0800 Subject: [PATCH 2/2] fixup --- pkg/statistics/BUILD.bazel | 108 ------------------------------------- statistics/builder.go | 6 --- 2 files changed, 114 deletions(-) delete mode 100644 pkg/statistics/BUILD.bazel diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel deleted file mode 100644 index 6997f6ceb6d1e..0000000000000 --- a/pkg/statistics/BUILD.bazel +++ /dev/null @@ -1,108 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") - -go_library( - name = "statistics", - srcs = [ - "analyze.go", - "analyze_jobs.go", - "builder.go", - "builder_ext_stats.go", - "cmsketch.go", - "cmsketch_util.go", - "column.go", - "debugtrace.go", - "estimate.go", - "fmsketch.go", - "histogram.go", - "index.go", - "row_sampler.go", - "sample.go", - "scalar.go", - "table.go", - ], - importpath = "github.com/pingcap/tidb/pkg/statistics", - visibility = ["//visibility:public"], - deps = [ - "//pkg/expression", - "//pkg/kv", - "//pkg/parser/ast", - "//pkg/parser/charset", - "//pkg/parser/model", - "//pkg/parser/mysql", - "//pkg/parser/terror", - "//pkg/planner/util/debugtrace", - "//pkg/sessionctx", - "//pkg/sessionctx/stmtctx", - "//pkg/sessionctx/variable", - "//pkg/statistics/handle/logutil", - "//pkg/tablecodec", - "//pkg/types", - "//pkg/util/chunk", - "//pkg/util/codec", - "//pkg/util/collate", - "//pkg/util/dbterror", - "//pkg/util/fastrand", - "//pkg/util/hack", - "//pkg/util/intest", - "//pkg/util/logutil", - "//pkg/util/memory", - "//pkg/util/ranger", - "//pkg/util/sqlexec", - "@com_github_dolthub_swiss//:swiss", - "@com_github_pingcap_errors//:errors", - "@com_github_pingcap_failpoint//:failpoint", - "@com_github_pingcap_tipb//go-tipb", - "@com_github_twmb_murmur3//:murmur3", - "@org_golang_x_exp//maps", - "@org_uber_go_atomic//:atomic", - "@org_uber_go_zap//:zap", - ], -) - -go_test( - name = "statistics_test", - timeout = "short", - srcs = [ - "bench_daily_test.go", - "builder_test.go", - "cmsketch_test.go", - "fmsketch_test.go", - "histogram_bench_test.go", - "histogram_test.go", - "integration_test.go", - "main_test.go", - "sample_test.go", - "scalar_test.go", - "statistics_test.go", - ], - data = glob(["testdata/**"]), - embed = [":statistics"], - flaky = True, - shard_count = 35, - deps = [ - "//pkg/config", - "//pkg/parser/ast", - "//pkg/parser/model", - "//pkg/parser/mysql", - "//pkg/sessionctx", - "//pkg/sessionctx/stmtctx", - "//pkg/statistics/handle/autoanalyze", - "//pkg/testkit", - "//pkg/testkit/testdata", - "//pkg/testkit/testmain", - "//pkg/testkit/testsetup", - "//pkg/types", - "//pkg/util/benchdaily", - "//pkg/util/chunk", - "//pkg/util/codec", - "//pkg/util/collate", - "//pkg/util/memory", - "//pkg/util/mock", - "//pkg/util/ranger", - "//pkg/util/sqlexec", - "@com_github_pingcap_errors//:errors", - "@com_github_pingcap_failpoint//:failpoint", - "@com_github_stretchr_testify//require", - "@org_uber_go_goleak//:goleak", - ], -) diff --git a/statistics/builder.go b/statistics/builder.go index ffd5e798a37d5..22946dd922db6 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -374,7 +374,6 @@ func BuildHistAndTopN( if err != nil { return nil, nil, errors.Trace(err) } -<<<<<<< HEAD:statistics/builder.go // For debugging invalid sample data. var ( foundTwice bool @@ -418,13 +417,8 @@ func BuildHistAndTopN( continue } } -======= ->>>>>>> 1fb5a9ae14a (planner: a better way to round scale factor when collecting TopN stats (#49808)):pkg/statistics/builder.go } - for i := 0; i < len(topNList); i++ { - topNList[i].Count *= uint64(sampleFactor) - } topn := &TopN{TopN: topNList} topn.Scale(sampleFactor)