From 8534bd8fb6de89cd8f762e1dcbfb729b4414fe9f Mon Sep 17 00:00:00 2001 From: yisaer Date: Mon, 13 Jun 2022 18:19:52 +0800 Subject: [PATCH 1/2] revise Signed-off-by: yisaer --- statistics/histogram.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/statistics/histogram.go b/statistics/histogram.go index cafeecd1ecd93..0cc0369122235 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -545,7 +545,7 @@ func (hg *Histogram) BetweenRowCount(a, b types.Datum) float64 { // BetweenRowCount estimates the row count for interval [l, r). func (c *Column) BetweenRowCount(sctx sessionctx.Context, l, r types.Datum, lowEncoded, highEncoded []byte) float64 { histBetweenCnt := c.Histogram.BetweenRowCount(l, r) - if c.StatsVer <= Version1 { + if c.StatsVer <= Version1 || c.TopN == nil { return histBetweenCnt } return float64(c.TopN.BetweenCount(lowEncoded, highEncoded)) + histBetweenCnt @@ -1046,7 +1046,7 @@ func (e *ErrorRate) Merge(rate *ErrorRate) { type Column struct { Histogram *CMSketch - *TopN + TopN *TopN *FMSketch PhysicalID int64 Count int64 @@ -1075,17 +1075,17 @@ func (c *Column) String() string { // TotalRowCount returns the total count of this column. func (c *Column) TotalRowCount() float64 { - if c.StatsVer >= Version2 { - return c.Histogram.TotalRowCount() + float64(c.TopN.TotalCount()) + if c.StatsVer <= Version1 || c.TopN == nil { + return c.Histogram.TotalRowCount() } - return c.Histogram.TotalRowCount() + return c.Histogram.TotalRowCount() + float64(c.TopN.TotalCount()) } func (c *Column) notNullCount() float64 { - if c.StatsVer >= Version2 { - return c.Histogram.notNullCount() + float64(c.TopN.TotalCount()) + if c.StatsVer <= Version1 || c.TopN == nil { + return c.Histogram.notNullCount() } - return c.Histogram.notNullCount() + return c.Histogram.notNullCount() + float64(c.TopN.TotalCount()) } // GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified. @@ -1168,7 +1168,7 @@ func (c *Column) equalRowCount(sctx sessionctx.Context, val types.Datum, encoded if c.Histogram.NDV > 0 && c.outOfRange(val) { return outOfRangeEQSelectivity(c.Histogram.NDV, realtimeRowCount, int64(c.TotalRowCount())) * c.TotalRowCount(), nil } - if c.CMSketch != nil { + if c.CMSketch != nil && c.TopN != nil { count, err := queryValue(sctx.GetSessionVars().StmtCtx, c.CMSketch, c.TopN, val) return float64(count), errors.Trace(err) } @@ -1183,7 +1183,7 @@ func (c *Column) equalRowCount(sctx sessionctx.Context, val types.Datum, encoded } // 1. try to find this value in TopN if c.TopN != nil { - rowcount, ok := c.QueryTopN(encodedVal) + rowcount, ok := c.TopN.QueryTopN(encodedVal) if ok { return float64(rowcount), nil } From 3396f330f6f3be717ed6f6ca5d35b0e046a4d604 Mon Sep 17 00:00:00 2001 From: yisaer Date: Tue, 14 Jun 2022 11:44:18 +0800 Subject: [PATCH 2/2] revise Signed-off-by: yisaer --- statistics/cmsketch.go | 52 +++++++++++++++++++++++------------------ statistics/histogram.go | 20 ++++++++-------- 2 files changed, 39 insertions(+), 33 deletions(-) diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index c9cc1de380564..15308e3a84c7a 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -173,24 +173,6 @@ func (c *CMSketch) MemoryUsage() (sum int64) { return } -// queryAddTopN TopN adds count to CMSketch.topN if exists, and returns the count of such elements after insert. -// If such elements does not in topn elements, nothing will happen and false will be returned. -func (c *TopN) updateTopNWithDelta(d []byte, delta uint64, increase bool) bool { - if c == nil || c.TopN == nil { - return false - } - idx := c.findTopN(d) - if idx >= 0 { - if increase { - c.TopN[idx].Count += delta - } else { - c.TopN[idx].Count -= delta - } - return true - } - return false -} - // InsertBytes inserts the bytes value into the CM Sketch. func (c *CMSketch) InsertBytes(bytes []byte) { c.InsertBytesByCount(bytes, 1) @@ -480,11 +462,6 @@ func (c *CMSketch) Copy() *CMSketch { return &CMSketch{count: c.count, width: c.width, depth: c.depth, table: tbl, defaultValue: c.defaultValue} } -// AppendTopN appends a topn into the TopN struct. -func (c *TopN) AppendTopN(data []byte, count uint64) { - c.TopN = append(c.TopN, TopNMeta{data, count}) -} - // GetWidthAndDepth returns the width and depth of CM Sketch. func (c *CMSketch) GetWidthAndDepth() (int32, int32) { return c.width, c.depth @@ -501,6 +478,14 @@ type TopN struct { TopN []TopNMeta } +// AppendTopN appends a topn into the TopN struct. +func (c *TopN) AppendTopN(data []byte, count uint64) { + if c == nil { + return + } + c.TopN = append(c.TopN, TopNMeta{data, count}) +} + func (c *TopN) String() string { if c == nil { return "EmptyTopN" @@ -530,6 +515,9 @@ func (c *TopN) Num() int { // DecodedString returns the value with decoded result. func (c *TopN) DecodedString(ctx sessionctx.Context, colTypes []byte) (string, error) { + if c == nil { + return "", nil + } builder := &strings.Builder{} fmt.Fprintf(builder, "TopN{length: %v, ", len(c.TopN)) fmt.Fprint(builder, "[") @@ -699,6 +687,24 @@ func (c *TopN) MemoryUsage() (sum int64) { return } +// queryAddTopN TopN adds count to CMSketch.topN if exists, and returns the count of such elements after insert. +// If such elements does not in topn elements, nothing will happen and false will be returned. +func (c *TopN) updateTopNWithDelta(d []byte, delta uint64, increase bool) bool { + if c == nil || c.TopN == nil { + return false + } + idx := c.findTopN(d) + if idx >= 0 { + if increase { + c.TopN[idx].Count += delta + } else { + c.TopN[idx].Count -= delta + } + return true + } + return false +} + // NewTopN creates the new TopN struct by the given size. func NewTopN(n int) *TopN { return &TopN{TopN: make([]TopNMeta, 0, n)} diff --git a/statistics/histogram.go b/statistics/histogram.go index 0cc0369122235..cafeecd1ecd93 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -545,7 +545,7 @@ func (hg *Histogram) BetweenRowCount(a, b types.Datum) float64 { // BetweenRowCount estimates the row count for interval [l, r). func (c *Column) BetweenRowCount(sctx sessionctx.Context, l, r types.Datum, lowEncoded, highEncoded []byte) float64 { histBetweenCnt := c.Histogram.BetweenRowCount(l, r) - if c.StatsVer <= Version1 || c.TopN == nil { + if c.StatsVer <= Version1 { return histBetweenCnt } return float64(c.TopN.BetweenCount(lowEncoded, highEncoded)) + histBetweenCnt @@ -1046,7 +1046,7 @@ func (e *ErrorRate) Merge(rate *ErrorRate) { type Column struct { Histogram *CMSketch - TopN *TopN + *TopN *FMSketch PhysicalID int64 Count int64 @@ -1075,17 +1075,17 @@ func (c *Column) String() string { // TotalRowCount returns the total count of this column. func (c *Column) TotalRowCount() float64 { - if c.StatsVer <= Version1 || c.TopN == nil { - return c.Histogram.TotalRowCount() + if c.StatsVer >= Version2 { + return c.Histogram.TotalRowCount() + float64(c.TopN.TotalCount()) } - return c.Histogram.TotalRowCount() + float64(c.TopN.TotalCount()) + return c.Histogram.TotalRowCount() } func (c *Column) notNullCount() float64 { - if c.StatsVer <= Version1 || c.TopN == nil { - return c.Histogram.notNullCount() + if c.StatsVer >= Version2 { + return c.Histogram.notNullCount() + float64(c.TopN.TotalCount()) } - return c.Histogram.notNullCount() + float64(c.TopN.TotalCount()) + return c.Histogram.notNullCount() } // GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified. @@ -1168,7 +1168,7 @@ func (c *Column) equalRowCount(sctx sessionctx.Context, val types.Datum, encoded if c.Histogram.NDV > 0 && c.outOfRange(val) { return outOfRangeEQSelectivity(c.Histogram.NDV, realtimeRowCount, int64(c.TotalRowCount())) * c.TotalRowCount(), nil } - if c.CMSketch != nil && c.TopN != nil { + if c.CMSketch != nil { count, err := queryValue(sctx.GetSessionVars().StmtCtx, c.CMSketch, c.TopN, val) return float64(count), errors.Trace(err) } @@ -1183,7 +1183,7 @@ func (c *Column) equalRowCount(sctx sessionctx.Context, val types.Datum, encoded } // 1. try to find this value in TopN if c.TopN != nil { - rowcount, ok := c.TopN.QueryTopN(encodedVal) + rowcount, ok := c.QueryTopN(encodedVal) if ok { return float64(rowcount), nil }