forked from pingcap/tidb
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is an automated cherry-pick of pingcap#57723
Signed-off-by: ti-chi-bot <[email protected]>
- Loading branch information
1 parent
5f4110d
commit 2485a65
Showing
6 changed files
with
3,505 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") | ||
|
||
go_library( | ||
name = "statistics", | ||
srcs = [ | ||
"analyze.go", | ||
"analyze_jobs.go", | ||
"builder.go", | ||
"builder_ext_stats.go", | ||
"cmsketch.go", | ||
"cmsketch_util.go", | ||
"column.go", | ||
"debugtrace.go", | ||
"estimate.go", | ||
"fmsketch.go", | ||
"histogram.go", | ||
"index.go", | ||
"row_sampler.go", | ||
"sample.go", | ||
"scalar.go", | ||
"table.go", | ||
], | ||
importpath = "github.com/pingcap/tidb/pkg/statistics", | ||
visibility = ["//visibility:public"], | ||
deps = [ | ||
"//pkg/expression", | ||
"//pkg/kv", | ||
"//pkg/meta/model", | ||
"//pkg/parser/ast", | ||
"//pkg/parser/charset", | ||
"//pkg/parser/mysql", | ||
"//pkg/parser/terror", | ||
"//pkg/planner/core/resolve", | ||
"//pkg/planner/planctx", | ||
"//pkg/planner/util/debugtrace", | ||
"//pkg/sessionctx", | ||
"//pkg/sessionctx/stmtctx", | ||
"//pkg/sessionctx/variable", | ||
"//pkg/statistics/asyncload", | ||
"//pkg/statistics/handle/logutil", | ||
"//pkg/tablecodec", | ||
"//pkg/types", | ||
"//pkg/util/chunk", | ||
"//pkg/util/codec", | ||
"//pkg/util/collate", | ||
"//pkg/util/context", | ||
"//pkg/util/dbterror", | ||
"//pkg/util/fastrand", | ||
"//pkg/util/hack", | ||
"//pkg/util/intest", | ||
"//pkg/util/logutil", | ||
"//pkg/util/memory", | ||
"//pkg/util/ranger", | ||
"//pkg/util/sqlexec", | ||
"@com_github_dolthub_swiss//:swiss", | ||
"@com_github_pingcap_errors//:errors", | ||
"@com_github_pingcap_failpoint//:failpoint", | ||
"@com_github_pingcap_tipb//go-tipb", | ||
"@com_github_twmb_murmur3//:murmur3", | ||
"@org_golang_x_exp//maps", | ||
"@org_uber_go_atomic//:atomic", | ||
"@org_uber_go_zap//:zap", | ||
], | ||
) | ||
|
||
go_test( | ||
name = "statistics_test", | ||
timeout = "short", | ||
srcs = [ | ||
"bench_daily_test.go", | ||
"builder_test.go", | ||
"cmsketch_test.go", | ||
"fmsketch_test.go", | ||
"histogram_bench_test.go", | ||
"histogram_test.go", | ||
"integration_test.go", | ||
"main_test.go", | ||
"sample_test.go", | ||
"scalar_test.go", | ||
"statistics_test.go", | ||
], | ||
data = glob(["testdata/**"]), | ||
embed = [":statistics"], | ||
flaky = True, | ||
shard_count = 38, | ||
deps = [ | ||
"//pkg/config", | ||
"//pkg/meta/model", | ||
"//pkg/parser/model", | ||
"//pkg/parser/mysql", | ||
"//pkg/planner/core/resolve", | ||
"//pkg/sessionctx", | ||
"//pkg/sessionctx/stmtctx", | ||
"//pkg/testkit", | ||
"//pkg/testkit/analyzehelper", | ||
"//pkg/testkit/testdata", | ||
"//pkg/testkit/testmain", | ||
"//pkg/testkit/testsetup", | ||
"//pkg/types", | ||
"//pkg/util/benchdaily", | ||
"//pkg/util/chunk", | ||
"//pkg/util/codec", | ||
"//pkg/util/collate", | ||
"//pkg/util/memory", | ||
"//pkg/util/mock", | ||
"//pkg/util/ranger", | ||
"//pkg/util/sqlexec", | ||
"@com_github_pingcap_errors//:errors", | ||
"@com_github_pingcap_failpoint//:failpoint", | ||
"@com_github_stretchr_testify//require", | ||
"@org_uber_go_goleak//:goleak", | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,272 @@ | ||
// Copyright 2022 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package statistics | ||
|
||
import ( | ||
"github.com/pingcap/tidb/pkg/meta/model" | ||
"github.com/pingcap/tidb/pkg/parser/mysql" | ||
"github.com/pingcap/tidb/pkg/planner/planctx" | ||
"github.com/pingcap/tidb/pkg/planner/util/debugtrace" | ||
"github.com/pingcap/tidb/pkg/statistics/asyncload" | ||
"github.com/pingcap/tidb/pkg/types" | ||
"github.com/pingcap/tidb/pkg/util/chunk" | ||
) | ||
|
||
// Column represents a column histogram. | ||
type Column struct { | ||
LastAnalyzePos types.Datum | ||
CMSketch *CMSketch | ||
TopN *TopN | ||
FMSketch *FMSketch | ||
Info *model.ColumnInfo | ||
Histogram | ||
|
||
// StatsLoadedStatus indicates the status of column statistics | ||
StatsLoadedStatus | ||
// PhysicalID is the physical table id, | ||
// or it could possibly be -1, which means "stats not available". | ||
// The -1 case could happen in a pseudo stats table, and in this case, this stats should not trigger stats loading. | ||
PhysicalID int64 | ||
Flag int64 | ||
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility | ||
|
||
IsHandle bool | ||
} | ||
|
||
// Copy copies the column. | ||
func (c *Column) Copy() *Column { | ||
if c == nil { | ||
return nil | ||
} | ||
nc := &Column{ | ||
PhysicalID: c.PhysicalID, | ||
Flag: c.Flag, | ||
StatsVer: c.StatsVer, | ||
IsHandle: c.IsHandle, | ||
} | ||
c.LastAnalyzePos.Copy(&nc.LastAnalyzePos) | ||
if c.CMSketch != nil { | ||
nc.CMSketch = c.CMSketch.Copy() | ||
} | ||
if c.TopN != nil { | ||
nc.TopN = c.TopN.Copy() | ||
} | ||
if c.FMSketch != nil { | ||
nc.FMSketch = c.FMSketch.Copy() | ||
} | ||
if c.Info != nil { | ||
nc.Info = c.Info.Clone() | ||
} | ||
nc.Histogram = *c.Histogram.Copy() | ||
nc.StatsLoadedStatus = c.StatsLoadedStatus.Copy() | ||
return nc | ||
} | ||
|
||
func (c *Column) String() string { | ||
return c.Histogram.ToString(0) | ||
} | ||
|
||
// TotalRowCount returns the total count of this column. | ||
func (c *Column) TotalRowCount() float64 { | ||
if c.StatsVer >= Version2 { | ||
return c.Histogram.TotalRowCount() + float64(c.TopN.TotalCount()) | ||
} | ||
return c.Histogram.TotalRowCount() | ||
} | ||
|
||
// NotNullCount returns the count of this column which is not null. | ||
func (c *Column) NotNullCount() float64 { | ||
if c.StatsVer >= Version2 { | ||
return c.Histogram.NotNullCount() + float64(c.TopN.TotalCount()) | ||
} | ||
return c.Histogram.NotNullCount() | ||
} | ||
|
||
// GetIncreaseFactor get the increase factor to adjust the final estimated count when the table is modified. | ||
func (c *Column) GetIncreaseFactor(realtimeRowCount int64) float64 { | ||
columnCount := c.TotalRowCount() | ||
if columnCount == 0 { | ||
// avoid dividing by 0 | ||
return 1.0 | ||
} | ||
return float64(realtimeRowCount) / columnCount | ||
} | ||
|
||
// MemoryUsage returns the total memory usage of Histogram, CMSketch, FMSketch in Column. | ||
// We ignore the size of other metadata in Column | ||
func (c *Column) MemoryUsage() CacheItemMemoryUsage { | ||
var sum int64 | ||
columnMemUsage := &ColumnMemUsage{ | ||
ColumnID: c.Info.ID, | ||
} | ||
histogramMemUsage := c.Histogram.MemoryUsage() | ||
columnMemUsage.HistogramMemUsage = histogramMemUsage | ||
sum = histogramMemUsage | ||
if c.CMSketch != nil { | ||
cmSketchMemUsage := c.CMSketch.MemoryUsage() | ||
columnMemUsage.CMSketchMemUsage = cmSketchMemUsage | ||
sum += cmSketchMemUsage | ||
} | ||
if c.TopN != nil { | ||
topnMemUsage := c.TopN.MemoryUsage() | ||
columnMemUsage.TopNMemUsage = topnMemUsage | ||
sum += topnMemUsage | ||
} | ||
if c.FMSketch != nil { | ||
fmSketchMemUsage := c.FMSketch.MemoryUsage() | ||
columnMemUsage.FMSketchMemUsage = fmSketchMemUsage | ||
sum += fmSketchMemUsage | ||
} | ||
columnMemUsage.TotalMemUsage = sum | ||
return columnMemUsage | ||
} | ||
|
||
// ColumnStatsIsInvalid checks if this column is invalid. | ||
// If this column has histogram but not loaded yet, | ||
// then we mark it as need histogram. | ||
func ColumnStatsIsInvalid(colStats *Column, sctx planctx.PlanContext, histColl *HistColl, cid int64) (res bool) { | ||
var totalCount float64 | ||
var ndv int64 | ||
var inValidForCollPseudo, essentialLoaded bool | ||
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace { | ||
debugtrace.EnterContextCommon(sctx) | ||
defer func() { | ||
debugtrace.RecordAnyValuesWithNames(sctx, | ||
"IsInvalid", res, | ||
"InValidForCollPseudo", inValidForCollPseudo, | ||
"TotalCount", totalCount, | ||
"NDV", ndv, | ||
"EssentialLoaded", essentialLoaded, | ||
) | ||
debugtrace.LeaveContextCommon(sctx) | ||
}() | ||
} | ||
if sctx != nil { | ||
stmtctx := sctx.GetSessionVars().StmtCtx | ||
if (colStats == nil || !colStats.IsStatsInitialized() || colStats.IsLoadNeeded()) && | ||
stmtctx != nil && | ||
!histColl.CanNotTriggerLoad { | ||
asyncload.AsyncLoadHistogramNeededItems.Insert(model.TableItemID{ | ||
TableID: histColl.PhysicalID, | ||
ID: cid, | ||
IsIndex: false, | ||
IsSyncLoadFailed: sctx.GetSessionVars().StmtCtx.StatsLoad.Timeout > 0, | ||
}, true) | ||
} | ||
} | ||
if histColl.Pseudo { | ||
inValidForCollPseudo = true | ||
return true | ||
} | ||
if colStats == nil { | ||
totalCount = -1 | ||
ndv = -1 | ||
essentialLoaded = false | ||
return true | ||
} | ||
// In some cases, some statistics in column would be evicted | ||
// For example: the cmsketch of the column might be evicted while the histogram and the topn are still exists | ||
// In this case, we will think this column as valid due to we can still use the rest of the statistics to do optimize. | ||
totalCount = colStats.TotalRowCount() | ||
essentialLoaded = colStats.IsEssentialStatsLoaded() | ||
ndv = colStats.Histogram.NDV | ||
return totalCount == 0 || (!essentialLoaded && ndv > 0) | ||
} | ||
|
||
// ItemID implements TableCacheItem | ||
func (c *Column) ItemID() int64 { | ||
return c.Info.ID | ||
} | ||
|
||
// DropUnnecessaryData drops the unnecessary data for the column. | ||
func (c *Column) DropUnnecessaryData() { | ||
if c.StatsVer < Version2 { | ||
c.CMSketch = nil | ||
} | ||
c.TopN = nil | ||
c.Histogram.Bounds = chunk.NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeBlob)}, 0) | ||
c.Histogram.Buckets = make([]Bucket, 0) | ||
c.Histogram.Scalars = make([]scalar, 0) | ||
c.evictedStatus = AllEvicted | ||
} | ||
|
||
// IsAllEvicted indicates whether all stats evicted | ||
func (c *Column) IsAllEvicted() bool { | ||
return c == nil || (c.statsInitialized && c.evictedStatus >= AllEvicted) | ||
} | ||
|
||
// GetEvictedStatus indicates the evicted status | ||
func (c *Column) GetEvictedStatus() int { | ||
return c.evictedStatus | ||
} | ||
|
||
// IsStatsInitialized indicates whether stats is initialized | ||
func (c *Column) IsStatsInitialized() bool { | ||
return c.statsInitialized | ||
} | ||
|
||
// GetStatsVer indicates the stats version | ||
func (c *Column) GetStatsVer() int64 { | ||
return c.StatsVer | ||
} | ||
|
||
// IsCMSExist indicates whether CMSketch exists | ||
func (c *Column) IsCMSExist() bool { | ||
return c.CMSketch != nil | ||
} | ||
|
||
// StatusToString gets the string info of StatsLoadedStatus | ||
func (s StatsLoadedStatus) StatusToString() string { | ||
if !s.statsInitialized { | ||
return "unInitialized" | ||
} | ||
switch s.evictedStatus { | ||
case AllLoaded: | ||
return "allLoaded" | ||
case AllEvicted: | ||
return "allEvicted" | ||
} | ||
return "unknown" | ||
} | ||
|
||
// IsAnalyzed indicates whether the column is analyzed. | ||
// The set of IsAnalyzed columns is a subset of the set of StatsAvailable columns. | ||
func (c *Column) IsAnalyzed() bool { | ||
return c.GetStatsVer() != Version0 | ||
} | ||
|
||
// StatsAvailable indicates whether the column stats are collected. | ||
// Note: | ||
// 1. The function merely talks about whether the stats are collected, regardless of the stats loaded status. | ||
// 2. The function is used to decide StatsLoadedStatus.statsInitialized when reading the column stats from storage. | ||
// 3. There are two cases that StatsAvailable is true: | ||
// a. IsAnalyzed is true. | ||
// b. The column is newly-added/modified and its stats are generated according to the default value. | ||
func (c *Column) StatsAvailable() bool { | ||
// Typically, when the column is analyzed, StatsVer is set to Version1/Version2, so we check IsAnalyzed(). | ||
// However, when we add/modify a column, its stats are generated according to the default value without setting | ||
// StatsVer, so we check NDV > 0 || NullCount > 0 for the case. | ||
return c.IsAnalyzed() || c.NDV > 0 || c.NullCount > 0 | ||
} | ||
|
||
// EmptyColumn creates an empty column object. It may be used for pseudo estimation or to stop loading unexisting stats. | ||
func EmptyColumn(tid int64, pkIsHandle bool, colInfo *model.ColumnInfo) *Column { | ||
return &Column{ | ||
PhysicalID: tid, | ||
Info: colInfo, | ||
Histogram: *NewHistogram(colInfo.ID, 0, 0, 0, &colInfo.FieldType, 0, 0), | ||
IsHandle: pkIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()), | ||
} | ||
} |
Oops, something went wrong.