diff --git a/pkg/statistics/BUILD.bazel b/pkg/statistics/BUILD.bazel index 6e1cdab915f94..5c43f8191d883 100644 --- a/pkg/statistics/BUILD.bazel +++ b/pkg/statistics/BUILD.bazel @@ -78,7 +78,7 @@ go_test( data = glob(["testdata/**"]), embed = [":statistics"], flaky = True, - shard_count = 33, + shard_count = 34, deps = [ "//pkg/config", "//pkg/parser/ast", diff --git a/pkg/statistics/builder.go b/pkg/statistics/builder.go index 363c023a579a9..ac66888e9695f 100644 --- a/pkg/statistics/builder.go +++ b/pkg/statistics/builder.go @@ -420,10 +420,8 @@ func BuildHistAndTopN( } } - for i := 0; i < len(topNList); i++ { - topNList[i].Count *= uint64(sampleFactor) - } topn := &TopN{TopN: topNList} + topn.Scale(sampleFactor) if uint64(count) <= topn.TotalCount() || int(hg.NDV) <= len(topn.TopN) { // TopN includes all sample data diff --git a/pkg/statistics/cmsketch.go b/pkg/statistics/cmsketch.go index b5fe74411888a..c35450b8bdf6a 100644 --- a/pkg/statistics/cmsketch.go +++ b/pkg/statistics/cmsketch.go @@ -530,6 +530,13 @@ type TopN struct { TopN []TopNMeta } +// Scale scales the TopN by the given factor. +func (c *TopN) Scale(scaleFactor float64) { + for i := range c.TopN { + c.TopN[i].Count = uint64(float64(c.TopN[i].Count) * scaleFactor) + } +} + // AppendTopN appends a topn into the TopN struct. func (c *TopN) AppendTopN(data []byte, count uint64) { if c == nil { diff --git a/pkg/statistics/cmsketch_test.go b/pkg/statistics/cmsketch_test.go index 7cbdfc62450d3..d2f43b2aef9a7 100644 --- a/pkg/statistics/cmsketch_test.go +++ b/pkg/statistics/cmsketch_test.go @@ -265,3 +265,23 @@ func TestSortTopnMeta(t *testing.T) { SortTopnMeta(data) require.Equal(t, uint64(2), data[0].Count) } + +func TestTopNScale(t *testing.T) { + for _, scaleFactor := range []float64{0.9999, 1.00001, 1.9999, 4.9999, 5.001, 9.99} { + var data []TopNMeta + sumCount := uint64(0) + for i := 0; i < 20; i++ { + cnt := uint64(rand.Intn(100000)) + data = append(data, TopNMeta{ + Count: cnt, + }) + sumCount += cnt + } + topN := TopN{TopN: data} + topN.Scale(scaleFactor) + scaleCount := float64(sumCount) * scaleFactor + delta := math.Abs(float64(topN.TotalCount()) - scaleCount) + roundErrorRatio := delta / scaleCount + require.Less(t, roundErrorRatio, 0.0001) + } +}