Skip to content

Commit

Permalink
executor, stats: fix fast analyze bugs (#10680) (#10691)
Browse files Browse the repository at this point in the history
  • Loading branch information
alivxxx authored and zz-jason committed Jun 4, 2019
1 parent eef187b commit 3daeff5
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 52 deletions.
10 changes: 8 additions & 2 deletions executor/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,6 @@ func (e *AnalyzeFastExec) getNextSampleKey(bo *tikv.Backoffer, startKey kv.Key)
func (e *AnalyzeFastExec) buildSampTask() (needRebuild bool, err error) {
// Do get regions row count.
bo := tikv.NewBackoffer(context.Background(), 500)
e.rowCount = 0
needRebuildForRoutine := make([]bool, e.concurrency)
errs := make([]error, e.concurrency)
sampTasksForRoutine := make([][]*AnalyzeFastTask, e.concurrency)
Expand Down Expand Up @@ -734,6 +733,13 @@ func (e *AnalyzeFastExec) buildSampTask() (needRebuild bool, err error) {
if err != nil {
return false, err
}
e.rowCount = 0
for _, task := range e.sampTasks {
cnt := task.EndOffset - task.BeginOffset
task.BeginOffset = e.rowCount
task.EndOffset = e.rowCount + cnt
e.rowCount += cnt
}
for {
// Search for the region which contains the targetKey.
loc, err := e.cache.LocateKey(bo, targetKey)
Expand Down Expand Up @@ -949,7 +955,7 @@ func (e *AnalyzeFastExec) handleSampTasks(bo *tikv.Backoffer, workID int, err *e
keys = append(keys, tablecodec.EncodeRowKeyWithHandle(tableID, randKey))
}

var kvMap map[string][]byte
kvMap := make(map[string][]byte, len(keys))
for _, key := range keys {
var iter kv.Iterator
iter, *err = snapshot.Iter(key, endKey)
Expand Down
68 changes: 23 additions & 45 deletions executor/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,6 @@ func (s *testSuite1) TestFastAnalyze(c *C) {
tk.MustExec("create table t(a int primary key, b int, index index_b(b))")
tk.MustExec("set @@session.tidb_enable_fast_analyze=1")
tk.MustExec("set @@session.tidb_build_stats_concurrency=1")
for i := 0; i < 3000; i++ {
tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
}
tblInfo, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tid := tblInfo.Meta().ID
Expand All @@ -248,56 +245,35 @@ func (s *testSuite1) TestFastAnalyze(c *C) {
splitKeys := generateTableSplitKeyForInt(tid, []int{600, 1200, 1800, 2400})
manipulateCluster(cluster, splitKeys)

for i := 0; i < 3000; i++ {
tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
}
tk.MustExec("analyze table t with 5 buckets")

is := executor.GetInfoSchema(tk.Se.(sessionctx.Context))
table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
c.Assert(err, IsNil)
tableInfo := table.Meta()
tbl := dom.StatsHandle().GetTableStats(tableInfo)
sTbl := fmt.Sprintln(tbl)
matched := false
if sTbl == "Table:39 Count:3000\n"+
c.Assert(tbl.String(), Equals, "Table:39 Count:3000\n"+
"column:1 ndv:3000 totColSize:0\n"+
"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+
"num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+
"num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+
"num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+
"num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+
"num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1\n"+
"column:2 ndv:3000 totColSize:0\n"+
"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+
"num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+
"num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+
"num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+
"num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+
"num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1\n"+
"index:1 ndv:3000\n"+
"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n" ||
sTbl == "Table:39 Count:3000\n"+
"column:2 ndv:3000 totColSize:0\n"+
"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+
"column:1 ndv:3000 totColSize:0\n"+
"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n"+
"index:1 ndv:3000\n"+
"num: 603 lower_bound: 6 upper_bound: 612 repeats: 1\n"+
"num: 603 lower_bound: 621 upper_bound: 1205 repeats: 1\n"+
"num: 603 lower_bound: 1207 upper_bound: 1830 repeats: 1\n"+
"num: 603 lower_bound: 1831 upper_bound: 2387 repeats: 1\n"+
"num: 588 lower_bound: 2390 upper_bound: 2997 repeats: 1\n" {
matched = true
}
c.Assert(matched, Equals, true)
"num: 603 lower_bound: 0 upper_bound: 658 repeats: 1\n"+
"num: 603 lower_bound: 663 upper_bound: 1248 repeats: 1\n"+
"num: 603 lower_bound: 1250 upper_bound: 1823 repeats: 1\n"+
"num: 603 lower_bound: 1830 upper_bound: 2379 repeats: 1\n"+
"num: 588 lower_bound: 2380 upper_bound: 2998 repeats: 1")
}

func (s *testSuite1) TestAnalyzeIncremental(c *C) {
Expand Down Expand Up @@ -415,7 +391,7 @@ func (s *testFastAnalyze) TestFastAnalyzeRetryRowCount(c *C) {
tk := testkit.NewTestKit(c, s.store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a int primary key, b int, index index_b(b))")
tk.MustExec("create table t(a int primary key)")
tk.MustExec("set @@session.tidb_enable_fast_analyze=1")
tk.MustExec("set @@session.tidb_build_stats_concurrency=1")
tblInfo, err := s.dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
Expand All @@ -425,12 +401,14 @@ func (s *testFastAnalyze) TestFastAnalyzeRetryRowCount(c *C) {
splitKeys := generateTableSplitKeyForInt(tid, []int{6, 12, 18, 24, 30})
regionIDs := manipulateCluster(s.cluster, splitKeys)
for i := 0; i < 30; i++ {
tk.MustExec(fmt.Sprintf("insert into t values (%d, %d)", i, i))
tk.MustExec(fmt.Sprintf("insert into t values (%d)", i))
}
s.cli.setFailRegion(regionIDs[4])
tk.MustExec("analyze table t")
// 4 regions will be sampled, and it will retry the last failed region.
c.Assert(s.cli.mu.count, Equals, int64(5))
row := tk.MustQuery(`show stats_meta where db_name = "test" and table_name = "t"`).Rows()[0]
c.Assert(row[5], Equals, "30")
}

func (s *testSuite1) TestFailedAnalyzeRequest(c *C) {
Expand Down
2 changes: 1 addition & 1 deletion statistics/cmsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ func (c *CMSketch) calculateDefaultVal(helper *topNHelper, estimateNDV, scaleRat
c.defaultValue = 1
} else {
estimateRemainingCount := rowCount - (helper.sampleSize-uint64(helper.onlyOnceItems))*scaleRatio
c.defaultValue = estimateRemainingCount / (estimateNDV - uint64(sampleNDV) + helper.onlyOnceItems)
c.defaultValue = estimateRemainingCount / mathutil.MaxUint64(1, estimateNDV-uint64(sampleNDV)+helper.onlyOnceItems)
}
}

Expand Down
15 changes: 13 additions & 2 deletions statistics/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package statistics
import (
"fmt"
"math"
"sort"
"strings"
"sync"

Expand Down Expand Up @@ -98,12 +99,22 @@ func (t *Table) Copy() *Table {
func (t *Table) String() string {
strs := make([]string, 0, len(t.Columns)+1)
strs = append(strs, fmt.Sprintf("Table:%d Count:%d", t.PhysicalID, t.Count))
cols := make([]*Column, 0, len(t.Columns))
for _, col := range t.Columns {
strs = append(strs, col.String())
cols = append(cols, col)
}
for _, col := range t.Indices {
sort.Slice(cols, func(i, j int) bool { return cols[i].ID < cols[j].ID })
for _, col := range cols {
strs = append(strs, col.String())
}
idxs := make([]*Index, 0, len(t.Indices))
for _, idx := range t.Indices {
idxs = append(idxs, idx)
}
sort.Slice(idxs, func(i, j int) bool { return idxs[i].ID < idxs[j].ID })
for _, idx := range idxs {
strs = append(strs, idx.String())
}
return strings.Join(strs, "\n")
}

Expand Down
5 changes: 3 additions & 2 deletions store/mockstore/mocktikv/rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"fmt"
"io"
"math"
"strconv"
"time"

Expand Down Expand Up @@ -874,8 +875,8 @@ func (c *RPCClient) SendRequest(ctx context.Context, addr string, req *tikvrpc.R
// DebugGetRegionProperties is for fast analyze in mock tikv.
case tikvrpc.CmdDebugGetRegionProperties:
r := req.DebugGetRegionProperties
region, _ := c.Cluster.GetRegionByID(r.RegionId)
scanResp := handler.handleKvScan(&kvrpcpb.ScanRequest{StartKey: region.StartKey, EndKey: region.EndKey})
region, _ := c.Cluster.GetRegion(r.RegionId)
scanResp := handler.handleKvScan(&kvrpcpb.ScanRequest{StartKey: MvccKey(region.StartKey).Raw(), EndKey: MvccKey(region.EndKey).Raw(), Version: math.MaxUint64, Limit: math.MaxUint32})
resp.DebugGetRegionProperties = &debugpb.GetRegionPropertiesResponse{
Props: []*debugpb.Property{{
Name: "mvcc.num_rows",
Expand Down

0 comments on commit 3daeff5

Please sign in to comment.