Skip to content

Commit

Permalink
Merge branch 'master' into fix_per_reg
Browse files Browse the repository at this point in the history
  • Loading branch information
rebelice authored Mar 12, 2021
2 parents efee6d5 + 1e2f1c0 commit 6ec703a
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 98 deletions.
8 changes: 7 additions & 1 deletion executor/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,14 +506,20 @@ func (s *testFastAnalyze) TestFastAnalyze(c *C) {
c.Assert(result.Rows()[1][5], Equals, "2")
c.Assert(result.Rows()[2][5], Equals, "3")
*/
}

func (s *testSerialSuite2) TestFastAnalyze4GlobalStats(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("set @@session.tidb_enable_fast_analyze=1")
tk.MustExec("set @@session.tidb_build_stats_concurrency=1")
// test fast analyze in dynamic mode
tk.MustExec("set @@session.tidb_analyze_version = 2;")
tk.MustExec("set @@session.tidb_partition_prune_mode = 'dynamic';")
tk.MustExec("drop table if exists t4;")
tk.MustExec("create table t4(a int, b int) PARTITION BY HASH(a) PARTITIONS 2;")
tk.MustExec("insert into t4 values(1,1),(3,3),(4,4),(2,2),(5,5);")
err = tk.ExecToErr("analyze table t4;")
err := tk.ExecToErr("analyze table t4;")
c.Assert(err.Error(), Equals, "Fast analyze hasn't reached General Availability and only support analyze version 1 currently.")
}

Expand Down
87 changes: 0 additions & 87 deletions statistics/cmsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"fmt"
"math"
"math/rand"
"strconv"
"time"

. "github.com/pingcap/check"
Expand Down Expand Up @@ -304,89 +303,3 @@ func (s *testStatisticsSuite) TestCMSketchCodingTopN(c *C) {
// do not panic
DecodeCMSketchAndTopN([]byte{}, rows)
}

func (s *testStatisticsSuite) TestMergeTopN(c *C) {
tests := []struct {
topnNum int
n int
maxTopNVal int
maxTopNCnt int
}{
{
topnNum: 10,
n: 5,
maxTopNVal: 50,
maxTopNCnt: 100,
},
{
topnNum: 1,
n: 5,
maxTopNVal: 50,
maxTopNCnt: 100,
},
{
topnNum: 5,
n: 5,
maxTopNVal: 5,
maxTopNCnt: 100,
},
{
topnNum: 5,
n: 5,
maxTopNVal: 10,
maxTopNCnt: 100,
},
}
for _, t := range tests {
topnNum, n := t.topnNum, t.n
maxTopNVal, maxTopNCnt := t.maxTopNVal, t.maxTopNCnt

// the number of maxTopNVal should be bigger than n.
ok := maxTopNVal >= n
c.Assert(ok, Equals, true)

topNs := make([]*TopN, 0, topnNum)
res := make(map[int]uint64)
rand.Seed(time.Now().Unix())
for i := 0; i < topnNum; i++ {
topN := NewTopN(n)
occur := make(map[int]bool)
for j := 0; j < n; j++ {
// The range of numbers in the topn structure is in [0, maxTopNVal)
// But there cannot be repeated occurrences of value in a topN structure.
randNum := rand.Intn(maxTopNVal)
for occur[randNum] {
randNum = rand.Intn(maxTopNVal)
}
occur[randNum] = true
tString := []byte(fmt.Sprintf("%d", randNum))
// The range of the number of occurrences in the topn structure is in [0, maxTopNCnt)
randCnt := uint64(rand.Intn(maxTopNCnt))
res[randNum] += randCnt
topNMeta := TopNMeta{tString, randCnt}
topN.TopN = append(topN.TopN, topNMeta)
}
topNs = append(topNs, topN)
}
topN, remainTopN := MergeTopN(topNs, uint32(n))
cnt := len(topN.TopN)
var minTopNCnt uint64
for _, topNMeta := range topN.TopN {
val, err := strconv.Atoi(string(topNMeta.Encoded))
c.Assert(err, IsNil)
c.Assert(topNMeta.Count, Equals, res[val])
minTopNCnt = topNMeta.Count
}
if remainTopN != nil {
cnt += len(remainTopN)
for _, remainTopNMeta := range remainTopN {
val, err := strconv.Atoi(string(remainTopNMeta.Encoded))
c.Assert(err, IsNil)
c.Assert(remainTopNMeta.Count, Equals, res[val])
ok = minTopNCnt > remainTopNMeta.Count
c.Assert(ok, Equals, true)
}
}
c.Assert(cnt, Equals, len(res))
}
}
8 changes: 4 additions & 4 deletions statistics/handle/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,9 @@ func (h *Handle) MergePartitionStats2GlobalStats(sc sessionctx.Context, opts map
return
}
globalTableInfo := globalTable.Meta()
partitionNum := globalTableInfo.Partition.Num
partitionNum := len(globalTableInfo.Partition.Definitions)
partitionIDs := make([]int64, 0, partitionNum)
for i := uint64(0); i < partitionNum; i++ {
for i := 0; i < partitionNum; i++ {
partitionIDs = append(partitionIDs, globalTableInfo.Partition.Definitions[i].ID)
}

Expand Down Expand Up @@ -420,7 +420,7 @@ func (h *Handle) MergePartitionStats2GlobalStats(sc sessionctx.Context, opts map
for i := 0; i < globalStats.Num; i++ {
// Merge CMSketch
globalStats.Cms[i] = allCms[i][0].Copy()
for j := uint64(1); j < partitionNum; j++ {
for j := 1; j < partitionNum; j++ {
err = globalStats.Cms[i].MergeCMSketch(allCms[i][j])
if err != nil {
return
Expand All @@ -444,7 +444,7 @@ func (h *Handle) MergePartitionStats2GlobalStats(sc sessionctx.Context, opts map
// For the column stats, we should merge the FMSketch first. And use the FMSketch to calculate the new NDV.
// merge FMSketch
globalStats.Fms[i] = allFms[i][0].Copy()
for j := uint64(1); j < partitionNum; j++ {
for j := 1; j < partitionNum; j++ {
globalStats.Fms[i].MergeFMSketch(allFms[i][j])
}

Expand Down
44 changes: 38 additions & 6 deletions statistics/handle/handle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1504,30 +1504,62 @@ partition by range (a) (
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)

tk.MustExec("set @@tidb_partition_prune_mode='static'")
tk.MustExec("set @@tidb_analyze_version=1")
tk.MustExec("set @@session.tidb_analyze_version=1")
tk.MustExec("analyze table t") // both p0 and p1 are in ver1
c.Assert(len(tk.MustQuery("show stats_meta").Rows()), Equals, 2)

tk.MustExec("set @@tidb_partition_prune_mode='dynamic'")
tk.MustExec("set @@tidb_analyze_version=1")
tk.MustExec("set @@session.tidb_analyze_version=1")
err := tk.ExecToErr("analyze table t") // try to build global-stats on ver1
c.Assert(err, NotNil)
c.Assert(err.Error(), Equals, "[stats]: some partition level statistics are not in statistics version 2, please set tidb_analyze_version to 2 and analyze the this table")

tk.MustExec("set @@tidb_partition_prune_mode='dynamic'")
tk.MustExec("set @@tidb_analyze_version=2")
tk.MustExec("set @@session.tidb_analyze_version=2")
err = tk.ExecToErr("analyze table t partition p1") // only analyze p1 to let it in ver2 while p0 is in ver1
c.Assert(err, NotNil)
c.Assert(err.Error(), Equals, "[stats]: some partition level statistics are not in statistics version 2, please set tidb_analyze_version to 2 and analyze the this table")

tk.MustExec("analyze table t") // both p0 and p1 are in ver2
c.Assert(len(tk.MustQuery("show stats_meta").Rows()), Equals, 3)

// If we already have global-stats, we can get the latest global-stats by analyzing the newly added partition.
tk.MustExec("alter table t add partition (partition p2 values less than (30))")
tk.MustExec("insert t values (13), (14)")
tk.MustExec("insert t values (13), (14), (22), (23)")
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)
tk.MustExec("analyze table t partition p2") // it will success since p0 and p1 are both in ver2
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)
do := s.do
is := do.InfoSchema()
h := do.StatsHandle()
c.Assert(h.Update(is), IsNil)
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tableInfo := tbl.Meta()
globalStats := h.GetTableStats(tableInfo)
// global.count = p0.count(3) + p1.count(2) + p2.count(2)
// We did not analyze partition p1, so the value here has not changed
c.Assert(globalStats.Count, Equals, int64(7))

tk.MustExec("analyze table t partition p1;")
globalStats = h.GetTableStats(tableInfo)
// global.count = p0.count(3) + p1.count(4) + p2.count(4)
// The value of p1.Count is correct now.
c.Assert(globalStats.Count, Equals, int64(9))
c.Assert(globalStats.ModifyCount, Equals, int64(0))

tk.MustExec("alter table t drop partition p2;")
c.Assert(s.do.StatsHandle().DumpStatsDeltaToKV(handle.DumpAll), IsNil)
tk.MustExec("analyze table t partition p2") // it will success since p0 and p1 are both in ver2
c.Assert(len(tk.MustQuery("show stats_meta").Rows()), Equals, 4) // p0, p1, p2 and global
globalStats = h.GetTableStats(tableInfo)
// The value of global.count will be updated the next time analyze.
c.Assert(globalStats.Count, Equals, int64(9))
c.Assert(globalStats.ModifyCount, Equals, int64(0))

tk.MustExec("analyze table t;")
globalStats = h.GetTableStats(tableInfo)
// global.count = p0.count(3) + p1.count(4)
// The value of global.Count is correct now.
c.Assert(globalStats.Count, Equals, int64(7))
}

func (s *testStatsSuite) TestExtendedStatsDefaultSwitch(c *C) {
Expand Down
88 changes: 88 additions & 0 deletions statistics/handle/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package handle_test
import (
"fmt"
"math"
"math/rand"
"os"
"strconv"
"strings"
Expand Down Expand Up @@ -2082,6 +2083,93 @@ func (s *testStatsSuite) TestFeedbackCounter(c *C) {
c.Assert(subtraction(newNum, oldNum), Equals, 20)
}

func (s *testSerialStatsSuite) TestMergeTopN(c *C) {
// Move this test to here to avoid race test.
tests := []struct {
topnNum int
n int
maxTopNVal int
maxTopNCnt int
}{
{
topnNum: 10,
n: 5,
maxTopNVal: 50,
maxTopNCnt: 100,
},
{
topnNum: 1,
n: 5,
maxTopNVal: 50,
maxTopNCnt: 100,
},
{
topnNum: 5,
n: 5,
maxTopNVal: 5,
maxTopNCnt: 100,
},
{
topnNum: 5,
n: 5,
maxTopNVal: 10,
maxTopNCnt: 100,
},
}
for _, t := range tests {
topnNum, n := t.topnNum, t.n
maxTopNVal, maxTopNCnt := t.maxTopNVal, t.maxTopNCnt

// the number of maxTopNVal should be bigger than n.
ok := maxTopNVal >= n
c.Assert(ok, Equals, true)

topNs := make([]*statistics.TopN, 0, topnNum)
res := make(map[int]uint64)
rand.Seed(time.Now().Unix())
for i := 0; i < topnNum; i++ {
topN := statistics.NewTopN(n)
occur := make(map[int]bool)
for j := 0; j < n; j++ {
// The range of numbers in the topn structure is in [0, maxTopNVal)
// But there cannot be repeated occurrences of value in a topN structure.
randNum := rand.Intn(maxTopNVal)
for occur[randNum] {
randNum = rand.Intn(maxTopNVal)
}
occur[randNum] = true
tString := []byte(fmt.Sprintf("%d", randNum))
// The range of the number of occurrences in the topn structure is in [0, maxTopNCnt)
randCnt := uint64(rand.Intn(maxTopNCnt))
res[randNum] += randCnt
topNMeta := statistics.TopNMeta{Encoded: tString, Count: randCnt}
topN.TopN = append(topN.TopN, topNMeta)
}
topNs = append(topNs, topN)
}
topN, remainTopN := statistics.MergeTopN(topNs, uint32(n))
cnt := len(topN.TopN)
var minTopNCnt uint64
for _, topNMeta := range topN.TopN {
val, err := strconv.Atoi(string(topNMeta.Encoded))
c.Assert(err, IsNil)
c.Assert(topNMeta.Count, Equals, res[val])
minTopNCnt = topNMeta.Count
}
if remainTopN != nil {
cnt += len(remainTopN)
for _, remainTopNMeta := range remainTopN {
val, err := strconv.Atoi(string(remainTopNMeta.Encoded))
c.Assert(err, IsNil)
c.Assert(remainTopNMeta.Count, Equals, res[val])
ok = minTopNCnt > remainTopNMeta.Count
c.Assert(ok, Equals, true)
}
}
c.Assert(cnt, Equals, len(res))
}
}

func (s *testSerialStatsSuite) TestAutoUpdatePartitionInDynamicOnlyMode(c *C) {
defer cleanEnv(c, s.store, s.do)
testKit := testkit.NewTestKit(c, s.store)
Expand Down

0 comments on commit 6ec703a

Please sign in to comment.