From 27a035e514d889c957db9556fdaa35ce27c07923 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Wed, 27 Dec 2023 11:10:57 +0800 Subject: [PATCH 1/3] This is an automated cherry-pick of #49808 Signed-off-by: ti-chi-bot --- pkg/statistics/BUILD.bazel | 4 ++++ pkg/statistics/builder.go | 4 ++++ pkg/statistics/cmsketch.go | 7 +++++++ pkg/statistics/cmsketch_test.go | 20 ++++++++++++++++++++ 4 files changed, 35 insertions(+) diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index 6e1cdab915f94..da987dca56446 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -78,7 +78,11 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, +<<<<<<< HEAD shard_count = 33, +======= + shard_count = 35, +>>>>>>> 1fb5a9ae14a (planner: a better way to round scale factor when collecting TopN stats (#49808)) deps = [ "//pkg/config", "//pkg/parser/ast", diff --git a/pkg/statistics/builder.go b/pkg/statistics/builder.go index 363c023a579a9..d258fac257b71 100644 --- a/pkg/statistics/builder.go +++ b/pkg/statistics/builder.go @@ -375,6 +375,7 @@ func BuildHistAndTopN( if err != nil { return nil, nil, errors.Trace(err) } +<<<<<<< HEAD // For debugging invalid sample data. var ( foundTwice bool @@ -418,12 +419,15 @@ func BuildHistAndTopN( continue } } +======= +>>>>>>> 1fb5a9ae14a (planner: a better way to round scale factor when collecting TopN stats (#49808)) } for i := 0; i < len(topNList); i++ { topNList[i].Count *= uint64(sampleFactor) } topn := &TopN{TopN: topNList} + topn.Scale(sampleFactor) if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) { // TopN includes all sample data diff --git a/pkg/statistics/cmsketch.go b/pkg/statistics/cmsketch.go index b5fe74411888a..c35450b8bdf6a 100644 --- a/pkg/statistics/cmsketch.go +++ b/pkg/statistics/cmsketch.go @@ -530,6 +530,13 @@ type TopN struct { TopN []TopNMeta } +// Scale scales the TopN by the given factor. +func (c *TopN) Scale(scaleFactor float64) { + for i := range c.TopN { + c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor) + } +} + // AppendTopN appends a topn into the TopN struct. func (c *TopN) AppendTopN(data []byte, count uint64) { if c == nil { diff --git a/pkg/statistics/cmsketch_test.go b/pkg/statistics/cmsketch_test.go index 7cbdfc62450d3..d2f43b2aef9a7 100644 --- a/pkg/statistics/cmsketch_test.go +++ b/pkg/statistics/cmsketch_test.go @@ -265,3 +265,23 @@ func TestSortTopnMeta(t *testing.T) { SortTopnMeta(data) require.Equal(t, uint64(2), data[0].Count) } + +func TestTopNScale(t *testing.T) { + for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} { + var data []TopNMeta + sumCount := uint64(0) + for i := 0; i < 20; i++ { + cnt := uint64(rand.Intn(100000)) + data = append(data, TopNMeta{ + Count: cnt, + }) + sumCount += cnt + } + topN := TopN{TopN: data} + topN.Scale(scaleFactor) + scaleCount := float64(sumCount) * scaleFactor + delta := math.Abs(float64(topN.TotalCount()) - scaleCount) + roundErrorRatio := delta / scaleCount + require.Less(t, roundErrorRatio, 0.0001) + } +} From dda16272720964937ae144ae7281463d92402429 Mon Sep 17 00:00:00 2001 From: qw4990 Date: Mon, 19 Feb 2024 21:15:27 +0800 Subject: [PATCH 2/3] fixup --- pkg/statistics/BUILD.bazel | 4 ---- pkg/statistics/builder.go | 6 ------ 2 files changed, 10 deletions(-) diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index da987dca56446..6e1cdab915f94 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -78,11 +78,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, -<<<<<<< HEAD shard_count = 33, -======= - shard_count = 35, ->>>>>>> 1fb5a9ae14a (planner: a better way to round scale factor when collecting TopN stats (#49808)) deps = [ "//pkg/config", "//pkg/parser/ast", diff --git a/pkg/statistics/builder.go b/pkg/statistics/builder.go index d258fac257b71..ac66888e9695f 100644 --- a/pkg/statistics/builder.go +++ b/pkg/statistics/builder.go @@ -375,7 +375,6 @@ func BuildHistAndTopN( if err != nil { return nil, nil, errors.Trace(err) } -<<<<<<< HEAD // For debugging invalid sample data. var ( foundTwice bool @@ -419,13 +418,8 @@ func BuildHistAndTopN( continue } } -======= ->>>>>>> 1fb5a9ae14a (planner: a better way to round scale factor when collecting TopN stats (#49808)) } - for i := 0; i < len(topNList); i++ { - topNList[i].Count *= uint64(sampleFactor) - } topn := &TopN{TopN: topNList} topn.Scale(sampleFactor) From 749a2faf5d3311e89541d79508d1993bb79355a0 Mon Sep 17 00:00:00 2001 From: qw4990 Date: Mon, 19 Feb 2024 21:18:27 +0800 Subject: [PATCH 3/3] fixup --- pkg/statistics/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index 6e1cdab915f94..5c43f8191d883 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -78,7 +78,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 33, + shard_count = 34, deps = [ "//pkg/config", "//pkg/parser/ast",