diff --git a/executor/analyze.go b/executor/analyze.go index 1a9e6555e524d..de0176ed5e18c 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -702,7 +702,6 @@ func (e *AnalyzeFastExec) getNextSampleKey(bo *tikv.Backoffer, startKey kv.Key) func (e *AnalyzeFastExec) buildSampTask() (needRebuild bool, err error) { // Do get regions row count. bo := tikv.NewBackoffer(context.Background(), 500) - e.rowCount = 0 needRebuildForRoutine := make([]bool, e.concurrency) errs := make([]error, e.concurrency) sampTasksForRoutine := make([][]*AnalyzeFastTask, e.concurrency) @@ -734,6 +733,13 @@ func (e *AnalyzeFastExec) buildSampTask() (needRebuild bool, err error) { if err != nil { return false, err } + e.rowCount = 0 + for _, task := range e.sampTasks { + cnt := task.EndOffset - task.BeginOffset + task.BeginOffset = e.rowCount + task.EndOffset = e.rowCount + cnt + e.rowCount += cnt + } for { // Search for the region which contains the targetKey. loc, err := e.cache.LocateKey(bo, targetKey) @@ -949,7 +955,7 @@ func (e *AnalyzeFastExec) handleSampTasks(bo *tikv.Backoffer, workID int, err *e keys = append(keys, tablecodec.EncodeRowKeyWithHandle(tableID, randKey)) } - var kvMap map[string][]byte + kvMap := make(map[string][]byte, len(keys)) for _, key := range keys { var iter kv.Iterator iter, *err = snapshot.Iter(key, endKey) diff --git a/executor/analyze_test.go b/executor/analyze_test.go index 4bb818cbfa63a..038bfe35829ec 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -237,9 +237,6 @@ func (s *testSuite1) TestFastAnalyze(c *C) { tk.MustExec("create table t(a int primary key, b int, index index_b(b))") tk.MustExec("set @@session.tidb_enable_fast_analyze=1") tk.MustExec("set @@session.tidb_build_stats_concurrency=1") - for i := 0; i < 3000; i++ { - tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i)) - } tblInfo, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) c.Assert(err, IsNil) tid := tblInfo.Meta().ID @@ -248,6 +245,9 @@ func (s *testSuite1) TestFastAnalyze(c *C) { splitKeys := generateTableSplitKeyForInt(tid, []int{600, 1200, 1800, 2400}) manipulateCluster(cluster, splitKeys) + for i := 0; i < 3000; i++ { + tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i)) + } tk.MustExec("analyze table t with 5 buckets") is := executor.GetInfoSchema(tk.Se.(sessionctx.Context)) @@ -255,49 +255,25 @@ func (s *testSuite1) TestFastAnalyze(c *C) { c.Assert(err, IsNil) tableInfo := table.Meta() tbl := dom.StatsHandle().GetTableStats(tableInfo) - sTbl := fmt.Sprintln(tbl) - matched := false - if sTbl == "Table:39 Count:3000\n"+ + c.Assert(tbl.String(), Equals, "Table:39 Count:3000\n"+ "column:1 ndv:3000 totColSize:0\n"+ - "num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+ - "num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+ - "num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+ - "num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+ - "num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+ + "num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+ + "num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+ + "num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+ + "num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+ + "num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1\n"+ "column:2 ndv:3000 totColSize:0\n"+ - "num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+ - "num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+ - "num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+ - "num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+ - "num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+ + "num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+ + "num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+ + "num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+ + "num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+ + "num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1\n"+ "index:1 ndv:3000\n"+ - "num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+ - "num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+ - "num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+ - "num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+ - "num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n" || - sTbl == "Table:39 Count:3000\n"+ - "column:2 ndv:3000 totColSize:0\n"+ - "num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+ - "num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+ - "num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+ - "num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+ - "num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+ - "column:1 ndv:3000 totColSize:0\n"+ - "num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+ - "num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+ - "num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+ - "num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+ - "num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+ - "index:1 ndv:3000\n"+ - "num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+ - "num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+ - "num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+ - "num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+ - "num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n" { - matched = true - } - c.Assert(matched, Equals, true) + "num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+ + "num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+ + "num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+ + "num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+ + "num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1") } func (s *testSuite1) TestAnalyzeIncremental(c *C) { @@ -415,7 +391,7 @@ func (s *testFastAnalyze) TestFastAnalyzeRetryRowCount(c *C) { tk := testkit.NewTestKit(c, s.store) tk.MustExec("use test") tk.MustExec("drop table if exists t") - tk.MustExec("create table t(a int primary key, b int, index index_b(b))") + tk.MustExec("create table t(a int primary key)") tk.MustExec("set @@session.tidb_enable_fast_analyze=1") tk.MustExec("set @@session.tidb_build_stats_concurrency=1") tblInfo, err := s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t")) @@ -425,12 +401,14 @@ func (s *testFastAnalyze) TestFastAnalyzeRetryRowCount(c *C) { splitKeys := generateTableSplitKeyForInt(tid, []int{6, 12, 18, 24, 30}) regionIDs := manipulateCluster(s.cluster, splitKeys) for i := 0; i < 30; i++ { - tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i)) + tk.MustExec(fmt.Sprintf("insert into t values (%d)", i)) } s.cli.setFailRegion(regionIDs[4]) tk.MustExec("analyze table t") // 4 regions will be sampled, and it will retry the last failed region. c.Assert(s.cli.mu.count, Equals, int64(5)) + row := tk.MustQuery(`show stats_meta where db_name = "test" and table_name = "t"`).Rows()[0] + c.Assert(row[5], Equals, "30") } func (s *testSuite1) TestFailedAnalyzeRequest(c *C) { diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 747337c1d50ee..727049675b960 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -145,7 +145,7 @@ func (c *CMSketch) calculateDefaultVal(helper *topNHelper, estimateNDV, scaleRat c.defaultValue = 1 } else { estimateRemainingCount := rowCount - (helper.sampleSize-uint64(helper.onlyOnceItems))*scaleRatio - c.defaultValue = estimateRemainingCount / (estimateNDV - uint64(sampleNDV) + helper.onlyOnceItems) + c.defaultValue = estimateRemainingCount / mathutil.MaxUint64(1, estimateNDV-uint64(sampleNDV)+helper.onlyOnceItems) } } diff --git a/statistics/table.go b/statistics/table.go index 9fc6964020fe3..c95979e81dbde 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -16,6 +16,7 @@ package statistics import ( "fmt" "math" + "sort" "strings" "sync" @@ -98,12 +99,22 @@ func (t *Table) Copy() *Table { func (t *Table) String() string { strs := make([]string, 0, len(t.Columns)+1) strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.PhysicalID, t.Count)) + cols := make([]*Column, 0, len(t.Columns)) for _, col := range t.Columns { - strs = append(strs, col.String()) + cols = append(cols, col) } - for _, col := range t.Indices { + sort.Slice(cols, func(i, j int) bool { return cols[i].ID < cols[j].ID }) + for _, col := range cols { strs = append(strs, col.String()) } + idxs := make([]*Index, 0, len(t.Indices)) + for _, idx := range t.Indices { + idxs = append(idxs, idx) + } + sort.Slice(idxs, func(i, j int) bool { return idxs[i].ID < idxs[j].ID }) + for _, idx := range idxs { + strs = append(strs, idx.String()) + } return strings.Join(strs, "\n") } diff --git a/store/mockstore/mocktikv/rpc.go b/store/mockstore/mocktikv/rpc.go index 3cf3aa3e062db..23d65f7267a85 100644 --- a/store/mockstore/mocktikv/rpc.go +++ b/store/mockstore/mocktikv/rpc.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "io" + "math" "strconv" "time" @@ -874,8 +875,8 @@ func (c *RPCClient) SendRequest(ctx context.Context, addr string, req *tikvrpc.R // DebugGetRegionProperties is for fast analyze in mock tikv. case tikvrpc.CmdDebugGetRegionProperties: r := req.DebugGetRegionProperties - region, _ := c.Cluster.GetRegionByID(r.RegionId) - scanResp := handler.handleKvScan(&kvrpcpb.ScanRequest{StartKey: region.StartKey, EndKey: region.EndKey}) + region, _ := c.Cluster.GetRegion(r.RegionId) + scanResp := handler.handleKvScan(&kvrpcpb.ScanRequest{StartKey: MvccKey(region.StartKey).Raw(), EndKey: MvccKey(region.EndKey).Raw(), Version: math.MaxUint64, Limit: math.MaxUint32}) resp.DebugGetRegionProperties = &debugpb.GetRegionPropertiesResponse{ Props: []*debugpb.Property{{ Name: "mvcc.num_rows",