Skip to content

Commit

Permalink
statistics: revise loaded for column (#35361)
Browse files Browse the repository at this point in the history
ref #34052
  • Loading branch information
Yisaer authored Jun 15, 2022
1 parent cd51580 commit 07c9e03
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
4 changes: 2 additions & 2 deletions statistics/handle/handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -827,11 +827,11 @@ func (h *Handle) columnStatsFromStorage(reader *statsReader, row chunk.Row, tabl
// We will not load buckets if:
// 1. Lease > 0, and:
// 2. this column is not handle, and:
// 3. the column doesn't has buckets before, and:
// 3. the column doesn't has any statistics before, and:
// 4. loadAll is false.
notNeedLoad := h.Lease() > 0 &&
!isHandle &&
(col == nil || !col.IsLoaded() && col.LastUpdateVersion < histVer) &&
(col == nil || !col.IsNecessaryLoaded() && col.LastUpdateVersion < histVer) &&
!loadAll
if notNeedLoad {
count, err := h.columnCountFromStorage(reader, table.PhysicalID, histID, statsVer)
Expand Down
23 changes: 19 additions & 4 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -1057,16 +1057,28 @@ type Column struct {
LastAnalyzePos types.Datum
StatsVer int64 // StatsVer is the version of the current stats, used to maintain compatibility

// Loaded means if the histogram, the topn and the cm sketch are loaded fully.
// Loaded means the statistics has been directly loaded from storage or json. (including the histogram, the topn
// and the cm sketch. In some tests, some of them are loaded, and we still set loaded is true to keep test simple.)
// Those three parts of a Column is loaded lazily. It will only be loaded after trying to use them.
// Note: Currently please use Column.IsLoaded() to check if it's loaded.
Loaded bool
}

// IsLoaded is a wrap around c.Loaded.
// It's just for safe when we are switching from `c.notNullCount() > 0)` to `c.Loaded`.
func (c *Column) IsLoaded() bool {
return c.Loaded || c.notNullCount() > 0
return c.Loaded
}

// IsNecessaryLoaded indicates whether the necessary statistics is loaded.
// If `IsLoaded` returns true, we will directly return due to they are directly loaded from outer storage and
// keep the tests still correct.
// If `IsLoaded` returns false, we will check whether histogram and topn are both loaded as the statistics need
// at least one of them to do optimize.
func (c *Column) IsNecessaryLoaded() bool {
if c.IsLoaded() {
return true
}
return c.notNullCount() > 0
}

func (c *Column) String() string {
Expand Down Expand Up @@ -1148,7 +1160,10 @@ func (c *Column) IsInvalid(sctx sessionctx.Context, collPseudo bool) bool {
}
}
}
return c.TotalRowCount() == 0 || (!c.IsLoaded() && c.Histogram.NDV > 0)
// In some cases, some statistics in column would be evicted
// For example: the cmsketch of the column might be evicted while the histogram and the topn are still exists
// In this case, we will think this column as valid due to we can still use the rest of the statistics to do optimize.
return c.TotalRowCount() == 0 || (!c.IsNecessaryLoaded() && c.Histogram.NDV > 0)
}

// IsHistNeeded checks if this column needs histogram to be loaded
Expand Down

0 comments on commit 07c9e03

Please sign in to comment.