From f2927a56dbdb7d169081a979c8310c7afa085800 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Tue, 27 Feb 2024 19:41:14 +0800 Subject: [PATCH] planner: a better way to round scale factor when collecting TopN stats (#49808) (#49820) close pingcap/tidb#49801 --- statistics/builder.go | 4 +--- statistics/cmsketch.go | 7 +++++++ statistics/cmsketch_test.go | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/statistics/builder.go b/statistics/builder.go index ec116803e952d..22946dd922db6 100644 --- a/statistics/builder.go +++ b/statistics/builder.go @@ -419,10 +419,8 @@ func BuildHistAndTopN( } } - for i := 0; i < len(topNList); i++ { - topNList[i].Count *= uint64(sampleFactor) - } topn := &TopN{TopN: topNList} + topn.Scale(sampleFactor) if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) { // TopN includes all sample data diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 43b29e7226a0e..829a824981f8f 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -529,6 +529,13 @@ type TopN struct { TopN []TopNMeta } +// Scale scales the TopN by the given factor. +func (c *TopN) Scale(scaleFactor float64) { + for i := range c.TopN { + c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor) + } +} + // AppendTopN appends a topn into the TopN struct. func (c *TopN) AppendTopN(data []byte, count uint64) { if c == nil { diff --git a/statistics/cmsketch_test.go b/statistics/cmsketch_test.go index bed7181d3fd70..044dc0fcf163d 100644 --- a/statistics/cmsketch_test.go +++ b/statistics/cmsketch_test.go @@ -389,3 +389,23 @@ func TestMergePartTopN2GlobalTopNWithHists(t *testing.T) { require.Equal(t, uint64(55), globalTopN.TotalCount(), "should have 55") require.Len(t, leftTopN, 1, "should have 1 left topN") } + +func TestTopNScale(t *testing.T) { + for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} { + var data []TopNMeta + sumCount := uint64(0) + for i := 0; i < 20; i++ { + cnt := uint64(rand.Intn(100000)) + data = append(data, TopNMeta{ + Count: cnt, + }) + sumCount += cnt + } + topN := TopN{TopN: data} + topN.Scale(scaleFactor) + scaleCount := float64(sumCount) * scaleFactor + delta := math.Abs(float64(topN.TotalCount()) - scaleCount) + roundErrorRatio := delta / scaleCount + require.Less(t, roundErrorRatio, 0.0001) + } +}