Skip to content

Commit

Permalink
statistics: using standard lib slices instead of exp slices (#45996)
Browse files Browse the repository at this point in the history
ref #45933
  • Loading branch information
fatelei authored Aug 14, 2023
1 parent c34f6fc commit 60588b9
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 39 deletions.
1 change: 0 additions & 1 deletion statistics/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ go_test(
"@com_github_pingcap_failpoint//:failpoint",
"@com_github_pingcap_log//:log",
"@com_github_stretchr_testify//require",
"@org_golang_x_exp//slices",
"@org_uber_go_goleak//:goleak",
"@org_uber_go_zap//:zap",
],
Expand Down
47 changes: 24 additions & 23 deletions statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ package statistics

import (
"bytes"
"cmp"
"fmt"
"math"
"reflect"
"slices"
"sort"
"strings"
"sync/atomic"
Expand All @@ -39,7 +41,6 @@ import (
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tipb/go-tipb"
"github.com/twmb/murmur3"
"golang.org/x/exp/slices"
)

// topNThreshold is the minimum ratio of the number of topN elements in CMSketch, 10 means 1 / 10 = 10%.
Expand Down Expand Up @@ -265,12 +266,12 @@ func queryValue(sctx sessionctx.Context, c *CMSketch, t *TopN, val types.Datum)
if sctx != nil {
sc = sctx.GetSessionVars().StmtCtx
}
bytes, err := tablecodec.EncodeValue(sc, nil, val)
rawData, err := tablecodec.EncodeValue(sc, nil, val)
if err != nil {
return 0, errors.Trace(err)
}
h1, h2 := murmur3.Sum128(bytes)
if ret, ok := t.QueryTopN(sctx, bytes); ok {
h1, h2 := murmur3.Sum128(rawData)
if ret, ok := t.QueryTopN(sctx, rawData); ok {
return ret, nil
}
return c.queryHashValue(sctx, h1, h2), nil
Expand All @@ -289,7 +290,7 @@ func (c *CMSketch) QueryBytes(d []byte) uint64 {
func (c *CMSketch) queryHashValue(sctx sessionctx.Context, h1, h2 uint64) (result uint64) {
vals := make([]uint32, c.depth)
originVals := make([]uint32, c.depth)
min := uint32(math.MaxUint32)
minValue := uint32(math.MaxUint32)
useDefaultValue := false
if sctx != nil && sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
Expand All @@ -309,8 +310,8 @@ func (c *CMSketch) queryHashValue(sctx sessionctx.Context, h1, h2 uint64) (resul
for i := range c.table {
j := (h1 + h2*uint64(i)) % uint64(c.width)
originVals[i] = c.table[i][j]
if min > c.table[i][j] {
min = c.table[i][j]
if minValue > c.table[i][j] {
minValue = c.table[i][j]
}
noise := (c.count - uint64(c.table[i][j])) / (uint64(c.width) - 1)
if uint64(c.table[i][j]) == 0 {
Expand All @@ -323,8 +324,8 @@ func (c *CMSketch) queryHashValue(sctx sessionctx.Context, h1, h2 uint64) (resul
}
slices.Sort(vals)
res := vals[(c.depth-1)/2] + (vals[c.depth/2]-vals[(c.depth-1)/2])/2
if res > min+temp {
res = min + temp
if res > minValue+temp {
res = minValue + temp
}
if res == 0 {
return uint64(0)
Expand Down Expand Up @@ -641,11 +642,11 @@ func (c *TopN) findTopN(d []byte) int {
}
match := false
idx := sort.Search(len(c.TopN), func(i int) bool {
cmp := bytes.Compare(c.TopN[i].Encoded, d)
if cmp == 0 {
cmpRst := bytes.Compare(c.TopN[i].Encoded, d)
if cmpRst == 0 {
match = true
}
return cmp >= 0
return cmpRst >= 0
})
if !match {
return -1
Expand All @@ -660,11 +661,11 @@ func (c *TopN) LowerBound(d []byte) (idx int, match bool) {
return 0, false
}
idx = sort.Search(len(c.TopN), func(i int) bool {
cmp := bytes.Compare(c.TopN[i].Encoded, d)
if cmp == 0 {
cmpRst := bytes.Compare(c.TopN[i].Encoded, d)
if cmpRst == 0 {
match = true
}
return cmp >= 0
return cmpRst >= 0
})
return idx, match
}
Expand Down Expand Up @@ -699,8 +700,8 @@ func (c *TopN) Sort() {
if c == nil {
return
}
slices.SortFunc(c.TopN, func(i, j TopNMeta) bool {
return bytes.Compare(i.Encoded, j.Encoded) < 0
slices.SortFunc(c.TopN, func(i, j TopNMeta) int {
return bytes.Compare(i.Encoded, j.Encoded)
})
}

Expand Down Expand Up @@ -919,11 +920,11 @@ func checkEmptyTopNs(topNs []*TopN) bool {

// SortTopnMeta sort topnMeta
func SortTopnMeta(topnMetas []TopNMeta) []TopNMeta {
slices.SortFunc(topnMetas, func(i, j TopNMeta) bool {
slices.SortFunc(topnMetas, func(i, j TopNMeta) int {
if i.Count != j.Count {
return i.Count > j.Count
return cmp.Compare(j.Count, i.Count)
}
return bytes.Compare(i.Encoded, j.Encoded) < 0
return bytes.Compare(i.Encoded, j.Encoded)
})
return topnMetas
}
Expand All @@ -934,11 +935,11 @@ func GetMergedTopNFromSortedSlice(sorted []TopNMeta, n uint32) (*TopN, []TopNMet
}

func getMergedTopNFromSortedSlice(sorted []TopNMeta, n uint32) (*TopN, []TopNMeta) {
slices.SortFunc(sorted, func(i, j TopNMeta) bool {
slices.SortFunc(sorted, func(i, j TopNMeta) int {
if i.Count != j.Count {
return i.Count > j.Count
return cmp.Compare(j.Count, i.Count)
}
return bytes.Compare(i.Encoded, j.Encoded) < 0
return bytes.Compare(i.Encoded, j.Encoded)
})
n = mathutil.Min(uint32(len(sorted)), n)

Expand Down
12 changes: 12 additions & 0 deletions statistics/cmsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,18 @@ func TestMergePartTopN2GlobalTopNWithoutHists(t *testing.T) {
require.Len(t, leftTopN, 1, "should have 1 left topN")
}

func TestSortTopnMeta(t *testing.T) {
data := []TopNMeta{{
Encoded: []byte("a"),
Count: 1,
}, {
Encoded: []byte("b"),
Count: 2,
}}
sortedData := SortTopnMeta(data)
require.Equal(t, uint64(2), sortedData[0].Count)
}

func TestMergePartTopN2GlobalTopNWithHists(t *testing.T) {
loc := time.UTC
sc := &stmtctx.StatementContext{TimeZone: loc}
Expand Down
8 changes: 4 additions & 4 deletions statistics/feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"encoding/gob"
"math"
"math/rand"
"slices"
"sort"
goatomic "sync/atomic"
"time"
Expand All @@ -41,7 +42,6 @@ import (
"github.com/pingcap/tidb/util/ranger"
"go.uber.org/atomic"
"go.uber.org/zap"
"golang.org/x/exp/slices"
)

// Feedback represents the total scan count in range [lower, upper).
Expand Down Expand Up @@ -353,19 +353,19 @@ func NonOverlappedFeedbacks(sc *stmtctx.StatementContext, fbs []Feedback) ([]Fee
// Sort feedbacks by end point and start point incrementally, then pick every feedback that is not overlapped
// with the previous chosen feedbacks.
var existsErr bool
slices.SortFunc(fbs, func(i, j Feedback) bool {
slices.SortFunc(fbs, func(i, j Feedback) int {
res, err := i.Upper.Compare(sc, j.Upper, collate.GetBinaryCollator())
if err != nil {
existsErr = true
}
if existsErr || res != 0 {
return res < 0
return res
}
res, err = i.Lower.Compare(sc, j.Lower, collate.GetBinaryCollator())
if err != nil {
existsErr = true
}
return res < 0
return res
})
if existsErr {
return fbs, false
Expand Down
1 change: 0 additions & 1 deletion statistics/handle/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ go_library(
"@com_github_pingcap_log//:log",
"@com_github_pingcap_tipb//go-tipb",
"@com_github_tikv_client_go_v2//oracle",
"@org_golang_x_exp//slices",
"@org_uber_go_atomic//:atomic",
"@org_uber_go_zap//:zap",
],
Expand Down
7 changes: 3 additions & 4 deletions statistics/handle/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"context"
"encoding/json"
"fmt"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -50,7 +51,6 @@ import (
"github.com/tikv/client-go/v2/oracle"
atomic2 "go.uber.org/atomic"
"go.uber.org/zap"
"golang.org/x/exp/slices"
)

const (
Expand Down Expand Up @@ -976,9 +976,8 @@ func MergeGlobalStatsTopNByConcurrency(mergeConcurrency, mergeBatchSize int, wra
// Remove the value from the Hists.
if len(removeTopn) > 0 {
tmp := removeTopn
slices.SortFunc(tmp, func(i, j statistics.TopNMeta) bool {
cmpResult := bytes.Compare(i.Encoded, j.Encoded)
return cmpResult < 0
slices.SortFunc(tmp, func(i, j statistics.TopNMeta) int {
return bytes.Compare(i.Encoded, j.Encoded)
})
wrapper.AllHg[i].RemoveVals(tmp)
}
Expand Down
9 changes: 5 additions & 4 deletions statistics/handle/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ package handle

import (
"bytes"
"cmp"
"context"
"fmt"
"math"
"math/rand"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -48,7 +50,6 @@ import (
"github.com/pingcap/tidb/util/timeutil"
"go.uber.org/atomic"
"go.uber.org/zap"
"golang.org/x/exp/slices"
)

type tableDeltaMap map[int64]variable.TableDelta
Expand Down Expand Up @@ -961,11 +962,11 @@ func (h *Handle) DumpColStatsUsageToKV() error {
for id, t := range colMap {
pairs = append(pairs, pair{tblColID: id, lastUsedAt: t.UTC().Format(types.TimeFormat)})
}
slices.SortFunc(pairs, func(i, j pair) bool {
slices.SortFunc(pairs, func(i, j pair) int {
if i.tblColID.TableID == j.tblColID.TableID {
return i.tblColID.ID < j.tblColID.ID
return cmp.Compare(i.tblColID.ID, j.tblColID.ID)
}
return i.tblColID.TableID < j.tblColID.TableID
return cmp.Compare(i.tblColID.TableID, j.tblColID.TableID)
})
// Use batch insert to reduce cost.
for i := 0; i < len(pairs); i += batchInsertSize {
Expand Down
2 changes: 1 addition & 1 deletion statistics/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package statistics
import (
"bytes"
"math"
"slices"
"strings"

"github.com/pingcap/failpoint"
Expand All @@ -33,7 +34,6 @@ import (
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/ranger"
"github.com/twmb/murmur3"
"golang.org/x/exp/slices"
)

// Index represents an index histogram.
Expand Down
2 changes: 1 addition & 1 deletion statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"os"
"regexp"
"runtime/pprof"
"slices"
"testing"
"time"

Expand All @@ -44,7 +45,6 @@ import (
"github.com/pingcap/tidb/util/mock"
"github.com/pingcap/tidb/util/ranger"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
)

func TestCollationColumnEstimate(t *testing.T) {
Expand Down

0 comments on commit 60588b9

Please sign in to comment.