executor, stats: fix fast analyze bugs (#10680) (#10691)

pingcap · Jun 4, 2019 · 3daeff5 · 3daeff5
1 parent eef187b
commit 3daeff5
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 52 deletions.
diff --git a/executor/analyze.go b/executor/analyze.go
@@ -702,7 +702,6 @@ func (e *AnalyzeFastExec) getNextSampleKey(bo *tikv.Backoffer, startKey kv.Key)
 func (e *AnalyzeFastExec) buildSampTask() (needRebuild bool, err error) {
 	// Do get regions row count.
 	bo := tikv.NewBackoffer(context.Background(), 500)
-	e.rowCount = 0
 	needRebuildForRoutine := make([]bool, e.concurrency)
 	errs := make([]error, e.concurrency)
 	sampTasksForRoutine := make([][]*AnalyzeFastTask, e.concurrency)
@@ -734,6 +733,13 @@ func (e *AnalyzeFastExec) buildSampTask() (needRebuild bool, err error) {
 	if err != nil {
 		return false, err
 	}
+	e.rowCount = 0
+	for _, task := range e.sampTasks {
+		cnt := task.EndOffset - task.BeginOffset
+		task.BeginOffset = e.rowCount
+		task.EndOffset = e.rowCount + cnt
+		e.rowCount += cnt
+	}
 	for {
 		// Search for the region which contains the targetKey.
 		loc, err := e.cache.LocateKey(bo, targetKey)
@@ -949,7 +955,7 @@ func (e *AnalyzeFastExec) handleSampTasks(bo *tikv.Backoffer, workID int, err *e
 			keys = append(keys, tablecodec.EncodeRowKeyWithHandle(tableID, randKey))
 		}
 
-		var kvMap map[string][]byte
+		kvMap := make(map[string][]byte, len(keys))
 		for _, key := range keys {
 			var iter kv.Iterator
 			iter, *err = snapshot.Iter(key, endKey)

diff --git a/executor/analyze_test.go b/executor/analyze_test.go
@@ -237,9 +237,6 @@ func (s *testSuite1) TestFastAnalyze(c *C) {
 	tk.MustExec("create table t(a int primary key, b int, index index_b(b))")
 	tk.MustExec("set @@session.tidb_enable_fast_analyze=1")
 	tk.MustExec("set @@session.tidb_build_stats_concurrency=1")
-	for i := 0; i < 3000; i++ {
-		tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
-	}
 	tblInfo, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
 	c.Assert(err, IsNil)
 	tid := tblInfo.Meta().ID
@@ -248,56 +245,35 @@ func (s *testSuite1) TestFastAnalyze(c *C) {
 	splitKeys := generateTableSplitKeyForInt(tid, []int{600, 1200, 1800, 2400})
 	manipulateCluster(cluster, splitKeys)
 
+	for i := 0; i < 3000; i++ {
+		tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
+	}
 	tk.MustExec("analyze table t with 5 buckets")
 
 	is := executor.GetInfoSchema(tk.Se.(sessionctx.Context))
 	table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
 	c.Assert(err, IsNil)
 	tableInfo := table.Meta()
 	tbl := dom.StatsHandle().GetTableStats(tableInfo)
-	sTbl := fmt.Sprintln(tbl)
-	matched := false
-	if sTbl == "Table:39 Count:3000\n"+
+	c.Assert(tbl.String(), Equals, "Table:39 Count:3000\n"+
 		"column:1 ndv:3000 totColSize:0\n"+
-		"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
-		"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
-		"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
-		"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
-		"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+
+		"num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+
+		"num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+
+		"num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+
+		"num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+
+		"num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1\n"+
 		"column:2 ndv:3000 totColSize:0\n"+
-		"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
-		"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
-		"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
-		"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
-		"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+
+		"num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+
+		"num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+
+		"num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+
+		"num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+
+		"num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1\n"+
 		"index:1 ndv:3000\n"+
-		"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
-		"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
-		"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
-		"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
-		"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n" ||
-		sTbl == "Table:39 Count:3000\n"+
-			"column:2 ndv:3000 totColSize:0\n"+
-			"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
-			"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
-			"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
-			"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
-			"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+
-			"column:1 ndv:3000 totColSize:0\n"+
-			"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
-			"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
-			"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
-			"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
-			"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+
-			"index:1 ndv:3000\n"+
-			"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
-			"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
-			"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
-			"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
-			"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n" {
-		matched = true
-	}
-	c.Assert(matched, Equals, true)
+		"num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+
+		"num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+
+		"num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+
+		"num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+
+		"num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1")
 }
 
 func (s *testSuite1) TestAnalyzeIncremental(c *C) {
@@ -415,7 +391,7 @@ func (s *testFastAnalyze) TestFastAnalyzeRetryRowCount(c *C) {
 	tk := testkit.NewTestKit(c, s.store)
 	tk.MustExec("use test")
 	tk.MustExec("drop table if exists t")
-	tk.MustExec("create table t(a int primary key, b int, index index_b(b))")
+	tk.MustExec("create table t(a int primary key)")
 	tk.MustExec("set @@session.tidb_enable_fast_analyze=1")
 	tk.MustExec("set @@session.tidb_build_stats_concurrency=1")
 	tblInfo, err := s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
@@ -425,12 +401,14 @@ func (s *testFastAnalyze) TestFastAnalyzeRetryRowCount(c *C) {
 	splitKeys := generateTableSplitKeyForInt(tid, []int{6, 12, 18, 24, 30})
 	regionIDs := manipulateCluster(s.cluster, splitKeys)
 	for i := 0; i < 30; i++ {
-		tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
+		tk.MustExec(fmt.Sprintf("insert into t values (%d)", i))
 	}
 	s.cli.setFailRegion(regionIDs[4])
 	tk.MustExec("analyze table t")
 	// 4 regions will be sampled, and it will retry the last failed region.
 	c.Assert(s.cli.mu.count, Equals, int64(5))
+	row := tk.MustQuery(`show stats_meta where db_name = "test" and table_name = "t"`).Rows()[0]
+	c.Assert(row[5], Equals, "30")
 }
 
 func (s *testSuite1) TestFailedAnalyzeRequest(c *C) {

diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go
@@ -145,7 +145,7 @@ func (c *CMSketch) calculateDefaultVal(helper *topNHelper, estimateNDV, scaleRat
 		c.defaultValue = 1
 	} else {
 		estimateRemainingCount := rowCount - (helper.sampleSize-uint64(helper.onlyOnceItems))*scaleRatio
-		c.defaultValue = estimateRemainingCount / (estimateNDV - uint64(sampleNDV) + helper.onlyOnceItems)
+		c.defaultValue = estimateRemainingCount / mathutil.MaxUint64(1, estimateNDV-uint64(sampleNDV)+helper.onlyOnceItems)
 	}
 }
 

diff --git a/statistics/table.go b/statistics/table.go
@@ -16,6 +16,7 @@ package statistics
 import (
 	"fmt"
 	"math"
+	"sort"
 	"strings"
 	"sync"
 
@@ -98,12 +99,22 @@ func (t *Table) Copy() *Table {
 func (t *Table) String() string {
 	strs := make([]string, 0, len(t.Columns)+1)
 	strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.PhysicalID, t.Count))
+	cols := make([]*Column, 0, len(t.Columns))
 	for _, col := range t.Columns {
-		strs = append(strs, col.String())
+		cols = append(cols, col)
 	}
-	for _, col := range t.Indices {
+	sort.Slice(cols, func(i, j int) bool { return cols[i].ID < cols[j].ID })
+	for _, col := range cols {
 		strs = append(strs, col.String())
 	}
+	idxs := make([]*Index, 0, len(t.Indices))
+	for _, idx := range t.Indices {
+		idxs = append(idxs, idx)
+	}
+	sort.Slice(idxs, func(i, j int) bool { return idxs[i].ID < idxs[j].ID })
+	for _, idx := range idxs {
+		strs = append(strs, idx.String())
+	}
 	return strings.Join(strs, "\n")
 }
 

diff --git a/store/mockstore/mocktikv/rpc.go b/store/mockstore/mocktikv/rpc.go
@@ -18,6 +18,7 @@ import (
 	"context"
 	"fmt"
 	"io"
+	"math"
 	"strconv"
 	"time"
 
@@ -874,8 +875,8 @@ func (c *RPCClient) SendRequest(ctx context.Context, addr string, req *tikvrpc.R
 	// DebugGetRegionProperties is for fast analyze in mock tikv.
 	case tikvrpc.CmdDebugGetRegionProperties:
 		r := req.DebugGetRegionProperties
-		region, _ := c.Cluster.GetRegionByID(r.RegionId)
-		scanResp := handler.handleKvScan(&kvrpcpb.ScanRequest{StartKey: region.StartKey, EndKey: region.EndKey})
+		region, _ := c.Cluster.GetRegion(r.RegionId)
+		scanResp := handler.handleKvScan(&kvrpcpb.ScanRequest{StartKey: MvccKey(region.StartKey).Raw(), EndKey: MvccKey(region.EndKey).Raw(), Version: math.MaxUint64, Limit: math.MaxUint32})
 		resp.DebugGetRegionProperties = &debugpb.GetRegionPropertiesResponse{
 			Props: []*debugpb.Property{{
 				Name:  "mvcc.num_rows",