diff --git a/executor/analyze.go b/executor/analyze.go index 5c27ba9e17d39..d52a0afeafafe 100644 --- a/executor/analyze.go +++ b/executor/analyze.go @@ -1075,13 +1075,13 @@ func (e *AnalyzeTestFastExec) TestFastSample() error { type analyzeIndexIncrementalExec struct { AnalyzeIndexExec - index *statistics.Index + oldHist *statistics.Histogram + oldCMS *statistics.CMSketch } func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult { - idx := idxExec.index - highBound := idx.Histogram.GetUpper(idx.Len() - 1) - values, err := codec.Decode(highBound.GetBytes(), len(idxExec.idxInfo.Columns)) + startPos := idxExec.oldHist.GetUpper(idxExec.oldHist.Len() - 1) + values, err := codec.DecodeRange(startPos.GetBytes(), len(idxExec.idxInfo.Columns)) if err != nil { return analyzeResult{Err: err, job: idxExec.job} } @@ -1090,16 +1090,12 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult if err != nil { return analyzeResult{Err: err, job: idxExec.job} } - oldHist, oldCMS, err := idx.RemoveUpperBound(idxExec.ctx.GetSessionVars().StmtCtx, values) + hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, idxExec.oldHist, hist, int(idxExec.maxNumBuckets)) if err != nil { return analyzeResult{Err: err, job: idxExec.job} } - hist, err = statistics.MergeHistograms(idxExec.ctx.GetSessionVars().StmtCtx, oldHist, hist, int(idxExec.maxNumBuckets)) - if err != nil { - return analyzeResult{Err: err, job: idxExec.job} - } - if oldCMS != nil && cms != nil { - err = cms.MergeCMSketch(oldCMS) + if idxExec.oldCMS != nil && cms != nil { + err = cms.MergeCMSketch4IncrementalAnalyze(idxExec.oldCMS) if err != nil { return analyzeResult{Err: err, job: idxExec.job} } @@ -1120,26 +1116,24 @@ func analyzeIndexIncremental(idxExec *analyzeIndexIncrementalExec) analyzeResult type analyzePKIncrementalExec struct { AnalyzeColumnsExec - pkStats *statistics.Column + oldHist *statistics.Histogram } func analyzePKIncremental(colExec *analyzePKIncrementalExec) analyzeResult { - pkStats := colExec.pkStats - high := pkStats.GetUpper(pkStats.Len() - 1) var maxVal types.Datum if mysql.HasUnsignedFlag(colExec.pkInfo.Flag) { maxVal = types.NewUintDatum(math.MaxUint64) } else { maxVal = types.NewIntDatum(math.MaxInt64) } - ran := ranger.Range{LowVal: []types.Datum{*high}, LowExclude: true, HighVal: []types.Datum{maxVal}} + startPos := *colExec.oldHist.GetUpper(colExec.oldHist.Len() - 1) + ran := ranger.Range{LowVal: []types.Datum{startPos}, LowExclude: true, HighVal: []types.Datum{maxVal}} hists, _, err := colExec.buildStats([]*ranger.Range{&ran}) if err != nil { return analyzeResult{Err: err, job: colExec.job} } hist := hists[0] - oldHist := pkStats.Histogram.Copy() - hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, oldHist, hist, int(colExec.maxNumBuckets)) + hist, err = statistics.MergeHistograms(colExec.ctx.GetSessionVars().StmtCtx, colExec.oldHist, hist, int(colExec.maxNumBuckets)) if err != nil { return analyzeResult{Err: err, job: colExec.job} } diff --git a/executor/analyze_test.go b/executor/analyze_test.go index c33880d4f4b90..d2e579286dd3d 100644 --- a/executor/analyze_test.go +++ b/executor/analyze_test.go @@ -24,9 +24,13 @@ import ( "github.com/pingcap/tidb/executor" "github.com/pingcap/tidb/session" "github.com/pingcap/tidb/sessionctx" + "github.com/pingcap/tidb/statistics" + "github.com/pingcap/tidb/statistics/handle" "github.com/pingcap/tidb/store/mockstore" "github.com/pingcap/tidb/store/mockstore/mocktikv" "github.com/pingcap/tidb/table" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/codec" "github.com/pingcap/tidb/util/testkit" ) @@ -303,4 +307,36 @@ func (s *testSuite1) TestAnalyzeIncremental(c *C) { tk.MustExec("analyze incremental table t index") // Result should not change. tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t a 0 1 2 1 2 2", "test t idx 1 0 1 1 1 1", "test t idx 1 1 2 1 2 2")) + + // Test analyze incremental with feedback. + tk.MustExec("insert into t values (3,3)") + oriProbability := statistics.FeedbackProbability.Load() + defer func() { + statistics.FeedbackProbability.Store(oriProbability) + }() + statistics.FeedbackProbability.Store(1) + is := s.dom.InfoSchema() + table, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + c.Assert(err, IsNil) + tblInfo := table.Meta() + tk.MustQuery("select * from t use index(idx) where b = 3") + tk.MustQuery("select * from t where a > 1") + h := s.dom.StatsHandle() + c.Assert(h.DumpStatsDeltaToKV(handle.DumpAll), IsNil) + c.Assert(h.DumpStatsFeedbackToKV(), IsNil) + c.Assert(h.HandleUpdateStats(is), IsNil) + c.Assert(h.Update(is), IsNil) + tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t a 0 1 3 0 2 2147483647", "test t idx 1 0 1 1 1 1", "test t idx 1 1 2 1 2 2")) + tblStats := h.GetTableStats(tblInfo) + val, err := codec.EncodeKey(tk.Se.GetSessionVars().StmtCtx, nil, types.NewIntDatum(3)) + c.Assert(err, IsNil) + c.Assert(tblStats.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(val), Equals, uint64(1)) + c.Assert(statistics.IsAnalyzed(tblStats.Indices[tblInfo.Indices[0].ID].Flag), IsFalse) + c.Assert(statistics.IsAnalyzed(tblStats.Columns[tblInfo.Columns[0].ID].Flag), IsFalse) + + tk.MustExec("analyze incremental table t index") + tk.MustQuery("show stats_buckets").Check(testkit.Rows("test t a 0 0 1 1 1 1", "test t a 0 1 2 1 2 2", "test t a 0 2 3 1 3 3", + "test t idx 1 0 1 1 1 1", "test t idx 1 1 2 1 2 2", "test t idx 1 2 3 1 3 3")) + tblStats = h.GetTableStats(tblInfo) + c.Assert(tblStats.Indices[tblInfo.Indices[0].ID].CMSketch.QueryBytes(val), Equals, uint64(1)) } diff --git a/executor/builder.go b/executor/builder.go index 2ccdc3584b865..d8f5d514e8e62 100644 --- a/executor/builder.go +++ b/executor/builder.go @@ -1398,18 +1398,28 @@ func (b *executorBuilder) buildAnalyzeIndexIncremental(task plannercore.AnalyzeI return analyzeTask } idx, ok := statsTbl.Indices[task.IndexInfo.ID] - // TODO: If the index contains feedback, we may use other strategy. - if !ok || idx.Len() == 0 || idx.ContainsFeedback() { + if !ok || idx.Len() == 0 || idx.LastAnalyzePos.IsNull() { return analyzeTask } - exec := analyzeTask.idxExec - if idx.CMSketch != nil { - width, depth := idx.CMSketch.GetWidthAndDepth() - exec.analyzePB.IdxReq.CmsketchWidth = &width - exec.analyzePB.IdxReq.CmsketchDepth = &depth + var oldHist *statistics.Histogram + if statistics.IsAnalyzed(idx.Flag) { + exec := analyzeTask.idxExec + if idx.CMSketch != nil { + width, depth := idx.CMSketch.GetWidthAndDepth() + exec.analyzePB.IdxReq.CmsketchWidth = &width + exec.analyzePB.IdxReq.CmsketchDepth = &depth + } + oldHist = idx.Histogram.Copy() + } else { + _, bktID := idx.LessRowCountWithBktIdx(idx.LastAnalyzePos) + if bktID == 0 { + return analyzeTask + } + oldHist = idx.TruncateHistogram(bktID) } + oldHist = oldHist.RemoveUpperBound() analyzeTask.taskType = idxIncrementalTask - analyzeTask.idxIncrementalExec = &analyzeIndexIncrementalExec{AnalyzeIndexExec: *analyzeTask.idxExec, index: idx} + analyzeTask.idxIncrementalExec = &analyzeIndexIncrementalExec{AnalyzeIndexExec: *analyzeTask.idxExec, oldHist: oldHist, oldCMS: idx.CMSketch} analyzeTask.job = &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: "analyze incremental index " + task.IndexInfo.Name.O} return analyzeTask } @@ -1458,13 +1468,28 @@ func (b *executorBuilder) buildAnalyzePKIncremental(task plannercore.AnalyzeColu return analyzeTask } col, ok := statsTbl.Columns[task.PKInfo.ID] - // TODO: If the primary key contains feedback, we may use other strategy. - if !ok || col.Len() == 0 || col.ContainsFeedback() { + if !ok || col.Len() == 0 || col.LastAnalyzePos.IsNull() { return analyzeTask } + var oldHist *statistics.Histogram + if statistics.IsAnalyzed(col.Flag) { + oldHist = col.Histogram.Copy() + } else { + d, err := col.LastAnalyzePos.ConvertTo(b.ctx.GetSessionVars().StmtCtx, col.Tp) + if err != nil { + b.err = err + return nil + } + _, bktID := col.LessRowCountWithBktIdx(d) + if bktID == 0 { + return analyzeTask + } + oldHist = col.TruncateHistogram(bktID) + oldHist.NDV = int64(oldHist.TotalRowCount()) + } exec := analyzeTask.colExec analyzeTask.taskType = pkIncrementalTask - analyzeTask.colIncrementalExec = &analyzePKIncrementalExec{AnalyzeColumnsExec: *exec, pkStats: col} + analyzeTask.colIncrementalExec = &analyzePKIncrementalExec{AnalyzeColumnsExec: *exec, oldHist: oldHist} analyzeTask.job = &statistics.AnalyzeJob{DBName: task.DBName, TableName: task.TableName, PartitionName: task.PartitionName, JobInfo: "analyze incremental primary key"} return analyzeTask } diff --git a/executor/executor_test.go b/executor/executor_test.go index ae1bd548eed47..c21b8104fae28 100644 --- a/executor/executor_test.go +++ b/executor/executor_test.go @@ -2514,6 +2514,7 @@ func (s *testSuite1) SetUpSuite(c *C) { c.Assert(err, IsNil) s.dom, err = session.BootstrapSession(s.store) c.Assert(err, IsNil) + s.dom.SetStatsUpdating(true) } func (s *testSuite1) TearDownSuite(c *C) { diff --git a/session/bootstrap.go b/session/bootstrap.go index 34a0503a051a7..94beedfc41ba4 100644 --- a/session/bootstrap.go +++ b/session/bootstrap.go @@ -171,6 +171,7 @@ const ( stats_ver bigint(64) NOT NULL DEFAULT 0, flag bigint(64) NOT NULL DEFAULT 0, correlation double NOT NULL DEFAULT 0, + last_analyze_pos blob DEFAULT NULL, unique index tbl(table_id, is_index, hist_id) );` @@ -328,6 +329,7 @@ const ( version28 = 28 version29 = 29 version30 = 30 + version31 = 31 ) func checkBootstrapped(s Session) (bool, error) { @@ -507,6 +509,10 @@ func upgrade(s Session) { upgradeToVer30(s) } + if ver < version31 { + upgradeToVer31(s) + } + updateBootstrapVer(s) _, err = s.Execute(context.Background(), "COMMIT") @@ -799,6 +805,10 @@ func upgradeToVer30(s Session) { mustExecute(s, CreateStatsTopNTable) } +func upgradeToVer31(s Session) { + doReentrantDDL(s, "ALTER TABLE mysql.stats_histograms ADD COLUMN `last_analyze_pos` blob default null", infoschema.ErrColumnExists) +} + // updateBootstrapVer updates bootstrap version variable in mysql.TiDB table. func updateBootstrapVer(s Session) { // Update bootstrap version. diff --git a/session/session.go b/session/session.go index 97f073c239130..1935af30a6a55 100644 --- a/session/session.go +++ b/session/session.go @@ -1559,7 +1559,7 @@ func createSessionWithDomain(store kv.Storage, dom *domain.Domain) (*session, er const ( notBootstrapped = 0 - currentBootstrapVersion = 30 + currentBootstrapVersion = 31 ) func getStoreBootstrapVersion(store kv.Storage) int64 { diff --git a/statistics/cmsketch.go b/statistics/cmsketch.go index 6c58fe47abab9..747337c1d50ee 100644 --- a/statistics/cmsketch.go +++ b/statistics/cmsketch.go @@ -298,6 +298,31 @@ func (c *CMSketch) MergeCMSketch(rc *CMSketch) error { return nil } +// MergeCMSketch4IncrementalAnalyze merges two CM Sketch for incremental analyze. Since there is no value +// that appears partially in `c` and `rc` for incremental analyze, it uses `max` to merge them. +// Here is a simple proof: when we query from the CM sketch, we use the `min` to get the answer: +// (1): For values that only appears in `c, using `max` to merge them affects the `min` query result less than using `sum`; +// (2): For values that only appears in `rc`, it is the same as condition (1); +// (3): For values that appears both in `c` and `rc`, if they do not appear partially in `c` and `rc`, for example, +// if `v` appears 5 times in the table, it can appears 5 times in `c` and 3 times in `rc`, then `max` also gives the correct answer. +// So in fact, if we can know the number of appearances of each value in the first place, it is better to use `max` to construct the CM sketch rather than `sum`. +func (c *CMSketch) MergeCMSketch4IncrementalAnalyze(rc *CMSketch) error { + if c.depth != rc.depth || c.width != rc.width { + return errors.New("Dimensions of Count-Min Sketch should be the same") + } + if c.topN != nil || rc.topN != nil { + return errors.New("CMSketch with Top-N does not support merge") + } + for i := range c.table { + c.count = 0 + for j := range c.table[i] { + c.table[i][j] = mathutil.MaxUint32(c.table[i][j], rc.table[i][j]) + c.count += uint64(c.table[i][j]) + } + } + return nil +} + // CMSketchToProto converts CMSketch to its protobuf representation. func CMSketchToProto(c *CMSketch) *tipb.CMSketch { protoSketch := &tipb.CMSketch{Rows: make([]*tipb.CMSketchRow, c.depth)} diff --git a/statistics/handle/bootstrap.go b/statistics/handle/bootstrap.go index a461bc6d6911d..3a9109454495b 100644 --- a/statistics/handle/bootstrap.go +++ b/statistics/handle/bootstrap.go @@ -109,7 +109,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat terror.Log(errors.Trace(err)) } hist := statistics.NewHistogram(id, ndv, nullCount, version, types.NewFieldType(mysql.TypeBlob), chunk.InitialCapacity, 0) - table.Indices[hist.ID] = &statistics.Index{Histogram: *hist, CMSketch: cms, Info: idxInfo, StatsVer: row.GetInt64(8)} + table.Indices[hist.ID] = &statistics.Index{Histogram: *hist, CMSketch: cms, Info: idxInfo, StatsVer: row.GetInt64(8), Flag: row.GetInt64(10), LastAnalyzePos: row.GetDatum(11, types.NewFieldType(mysql.TypeBlob))} } else { var colInfo *model.ColumnInfo for _, col := range tbl.Meta().Columns { @@ -124,11 +124,13 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat hist := statistics.NewHistogram(id, ndv, nullCount, version, &colInfo.FieldType, 0, totColSize) hist.Correlation = row.GetFloat64(9) table.Columns[hist.ID] = &statistics.Column{ - Histogram: *hist, - PhysicalID: table.PhysicalID, - Info: colInfo, - Count: nullCount, - IsHandle: tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), + Histogram: *hist, + PhysicalID: table.PhysicalID, + Info: colInfo, + Count: nullCount, + IsHandle: tbl.Meta().PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), + Flag: row.GetInt64(10), + LastAnalyzePos: row.GetDatum(11, types.NewFieldType(mysql.TypeBlob)), } } } @@ -137,7 +139,7 @@ func (h *Handle) initStatsHistograms4Chunk(is infoschema.InfoSchema, tables Stat func (h *Handle) initStatsHistograms(is infoschema.InfoSchema, tables StatsCache) error { h.mu.Lock() defer h.mu.Unlock() - sql := "select HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation from mysql.stats_histograms" + sql := "select HIGH_PRIORITY table_id, is_index, hist_id, distinct_count, version, null_count, cm_sketch, tot_col_size, stats_ver, correlation, flag, last_analyze_pos from mysql.stats_histograms" rc, err := h.mu.ctx.(sqlexec.SQLExecutor).Execute(context.TODO(), sql) if len(rc) > 0 { defer terror.Call(rc[0].Close) diff --git a/statistics/handle/handle.go b/statistics/handle/handle.go index 6cfbf16450bcc..912dbed1e6865 100644 --- a/statistics/handle/handle.go +++ b/statistics/handle/handle.go @@ -342,7 +342,8 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *statistics.Table, t nullCount := row.GetInt64(5) idx := table.Indices[histID] errorRate := statistics.ErrorRate{} - if statistics.IsAnalyzed(row.GetInt64(8)) { + flag := row.GetInt64(8) + if statistics.IsAnalyzed(flag) { h.mu.Lock() h.mu.rateMap.clear(table.PhysicalID, histID, true) h.mu.Unlock() @@ -362,7 +363,7 @@ func (h *Handle) indexStatsFromStorage(row chunk.Row, table *statistics.Table, t if err != nil { return errors.Trace(err) } - idx = &statistics.Index{Histogram: *hg, CMSketch: cms, Info: idxInfo, ErrorRate: errorRate, StatsVer: row.GetInt64(7)} + idx = &statistics.Index{Histogram: *hg, CMSketch: cms, Info: idxInfo, ErrorRate: errorRate, StatsVer: row.GetInt64(7), Flag: flag, LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))} } break } @@ -383,7 +384,8 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table, correlation := row.GetFloat64(9) col := table.Columns[histID] errorRate := statistics.ErrorRate{} - if statistics.IsAnalyzed(row.GetInt64(8)) { + flag := row.GetInt64(8) + if statistics.IsAnalyzed(flag) { h.mu.Lock() h.mu.rateMap.clear(table.PhysicalID, histID, false) h.mu.Unlock() @@ -410,12 +412,14 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table, return errors.Trace(err) } col = &statistics.Column{ - PhysicalID: table.PhysicalID, - Histogram: *statistics.NewHistogram(histID, distinct, nullCount, histVer, &colInfo.FieldType, 0, totColSize), - Info: colInfo, - Count: count + nullCount, - ErrorRate: errorRate, - IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), + PhysicalID: table.PhysicalID, + Histogram: *statistics.NewHistogram(histID, distinct, nullCount, histVer, &colInfo.FieldType, 0, totColSize), + Info: colInfo, + Count: count + nullCount, + ErrorRate: errorRate, + IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), + Flag: flag, + LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob)), } col.Histogram.Correlation = correlation break @@ -430,13 +434,15 @@ func (h *Handle) columnStatsFromStorage(row chunk.Row, table *statistics.Table, return errors.Trace(err) } col = &statistics.Column{ - PhysicalID: table.PhysicalID, - Histogram: *hg, - Info: colInfo, - CMSketch: cms, - Count: int64(hg.TotalRowCount()), - ErrorRate: errorRate, - IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), + PhysicalID: table.PhysicalID, + Histogram: *hg, + Info: colInfo, + CMSketch: cms, + Count: int64(hg.TotalRowCount()), + ErrorRate: errorRate, + IsHandle: tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.Flag), + Flag: flag, + LastAnalyzePos: row.GetDatum(10, types.NewFieldType(mysql.TypeBlob)), } break } @@ -478,7 +484,7 @@ func (h *Handle) tableStatsFromStorage(tableInfo *model.TableInfo, physicalID in table = table.Copy() } table.Pseudo = false - selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation from mysql.stats_histograms where table_id = %d", physicalID) + selSQL := fmt.Sprintf("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation, last_analyze_pos from mysql.stats_histograms where table_id = %d", physicalID) var rows []chunk.Row if historyStatsExec != nil { rows, _, err = historyStatsExec.ExecRestrictedSQLWithSnapshot(nil, selSQL) @@ -569,6 +575,7 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg return } sc := h.mu.ctx.GetSessionVars().StmtCtx + var lastAnalyzePos []byte for i := range hg.Buckets { count := hg.Buckets[i].Count if i > 0 { @@ -579,6 +586,9 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg if err != nil { return } + if i == len(hg.Buckets)-1 { + lastAnalyzePos = upperBound.GetBytes() + } var lowerBound types.Datum lowerBound, err = hg.GetLower(i).ConvertTo(sc, types.NewFieldType(mysql.TypeBlob)) if err != nil { @@ -590,6 +600,13 @@ func (h *Handle) SaveStatsToStorage(tableID int64, count int64, isIndex int, hg return } } + if isAnalyzed == 1 && len(lastAnalyzePos) > 0 { + sql = fmt.Sprintf("update mysql.stats_histograms set last_analyze_pos = X'%X' where table_id = %d and is_index = %d and hist_id = %d", lastAnalyzePos, tableID, isIndex, hg.ID) + _, err = exec.Execute(ctx, sql) + if err != nil { + return + } + } return } diff --git a/statistics/handle/update.go b/statistics/handle/update.go index 24ad2fe7e1c85..f5dd421adc4d5 100644 --- a/statistics/handle/update.go +++ b/statistics/handle/update.go @@ -436,6 +436,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) { newIdx.CMSketch = statistics.UpdateCMSketch(idx.CMSketch, eqFB) newIdx.Histogram = *statistics.UpdateHistogram(&idx.Histogram, &statistics.QueryFeedback{Feedback: ranFB}) newIdx.Histogram.PreCalculateScalar() + newIdx.Flag = statistics.ResetAnalyzeFlag(newIdx.Flag) newTblStats.Indices[fb.Hist.ID] = &newIdx } else { col, ok := tblStats.Columns[fb.Hist.ID] @@ -448,6 +449,7 @@ func (h *Handle) UpdateStatsByLocalFeedback(is infoschema.InfoSchema) { newFB := &statistics.QueryFeedback{Feedback: ranFB} newFB = newFB.DecodeIntValues() newCol.Histogram = *statistics.UpdateHistogram(&col.Histogram, newFB) + newCol.Flag = statistics.ResetAnalyzeFlag(newCol.Flag) newTblStats.Columns[fb.Hist.ID] = &newCol } h.UpdateTableStats([]*statistics.Table{newTblStats}, nil) diff --git a/statistics/histogram.go b/statistics/histogram.go index 31b70eabecd79..951070a45fe20 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -34,7 +34,6 @@ import ( "github.com/pingcap/tidb/util/logutil" "github.com/pingcap/tidb/util/ranger" "github.com/pingcap/tipb/go-tipb" - "github.com/spaolacci/murmur3" "go.uber.org/zap" ) @@ -197,7 +196,7 @@ const ( Version1 = 1 ) -// AnalyzeFlag is one for column flag. We can use IsAnalyzed to check whether this column is analyzed or not. +// AnalyzeFlag is set when the statistics comes from analyze and has not been modified by feedback. const AnalyzeFlag = 1 // IsAnalyzed checks whether this flag contains AnalyzeFlag. @@ -205,6 +204,11 @@ func IsAnalyzed(flag int64) bool { return (flag & AnalyzeFlag) > 0 } +// ResetAnalyzeFlag resets the AnalyzeFlag because it has been modified by feedback. +func ResetAnalyzeFlag(flag int64) int64 { + return flag &^ AnalyzeFlag +} + // ValueToString converts a possible encoded value to a formatted string. If the value is encoded, then // idxCols equals to number of origin values, else idxCols is 0. func ValueToString(value *types.Datum, idxCols int) (string, error) { @@ -566,17 +570,6 @@ func (hg *Histogram) outOfRange(val types.Datum) bool { chunk.Compare(hg.Bounds.GetRow(hg.Bounds.NumRows()-1), 0, &val) < 0 } -// ContainsFeedback checks if the histogram contains feedback updates. -// We can test it from the `repeat` field because only feedback will update it to 0. -func (hg *Histogram) ContainsFeedback() bool { - for _, bkt := range hg.Buckets { - if bkt.Repeat == 0 { - return true - } - } - return false -} - // Copy deep copies the histogram. func (hg *Histogram) Copy() *Histogram { newHist := *hg @@ -588,6 +581,22 @@ func (hg *Histogram) Copy() *Histogram { return &newHist } +// RemoveUpperBound removes the upper bound from histogram. +// It is used when merge stats for incremental analyze. +func (hg *Histogram) RemoveUpperBound() *Histogram { + hg.Buckets[hg.Len()-1].Count -= hg.Buckets[hg.Len()-1].Repeat + hg.Buckets[hg.Len()-1].Repeat = 0 + return hg +} + +// TruncateHistogram truncates the histogram to `numBkt` buckets. +func (hg *Histogram) TruncateHistogram(numBkt int) *Histogram { + hist := hg.Copy() + hist.Buckets = hist.Buckets[:numBkt] + hist.Bounds.TruncateTo(numBkt * 2) + return hist +} + // ErrorRate is the error rate of estimate row count by bucket and cm sketch. type ErrorRate struct { ErrorTotal float64 @@ -629,6 +638,8 @@ type Column struct { Info *model.ColumnInfo IsHandle bool ErrorRate + Flag int64 + LastAnalyzePos types.Datum } func (c *Column) String() string { @@ -730,8 +741,10 @@ type Index struct { Histogram *CMSketch ErrorRate - StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility - Info *model.IndexInfo + StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility + Info *model.IndexInfo + Flag int64 + LastAnalyzePos types.Datum } func (idx *Index) String() string { @@ -990,28 +1003,6 @@ func (idx *Index) outOfRange(val types.Datum) bool { return !withInLowBoundOrPrefixMatch || !withInHighBound } -// RemoveUpperBound removes the upper bound the index stats. -// It is used when merge stats for incremental analyze. -func (idx *Index) RemoveUpperBound(sc *stmtctx.StatementContext, values []types.Datum) (*Histogram, *CMSketch, error) { - hist, cms := idx.Histogram.Copy(), idx.CMSketch.Copy() - hist.Buckets[hist.Len()-1].Count -= hist.Buckets[hist.Len()-1].Repeat - hist.Buckets[hist.Len()-1].Repeat = 0 - if cms == nil { - return hist, nil, nil - } - var data []byte - var err error - for _, val := range values { - data, err = codec.EncodeKey(sc, data, val) - if err != nil { - return nil, nil, err - } - h1, h2 := murmur3.Sum128(data) - cms.setValue(h1, h2, 0) - } - return hist, cms, nil -} - // matchPrefix checks whether ad is the prefix of value func matchPrefix(row chunk.Row, colIdx int, ad *types.Datum) bool { switch ad.Kind() {