-
Notifications
You must be signed in to change notification settings - Fork 5.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
planner, statistics: use the correct column ID when recording stats loading status #52208
Changes from 11 commits
c35daf5
e60c752
72dfc6a
33291ed
a44e2b2
93eb1e6
51918df
a7eab99
9808c88
a320f42
65c5878
6758bd7
688b40d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -152,7 +152,7 @@ func crossEstimateRowCount(sctx context.PlanContext, | |
return 0, err == nil, corr | ||
} | ||
idxID := int64(-1) | ||
idxIDs, idxExists := dsStatsInfo.HistColl.ColID2IdxIDs[colID] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is better to use the clearly name 'colUniqueID' instead of 'colID' to avoid the wrong used. |
||
idxIDs, idxExists := dsStatsInfo.HistColl.ColUniqueID2IdxIDs[colID] | ||
if idxExists && len(idxIDs) > 0 { | ||
idxID = idxIDs[0] | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,7 +45,11 @@ func GetRowCountByColumnRanges(sctx context.PlanContext, coll *statistics.HistCo | |
} | ||
sc := sctx.GetSessionVars().StmtCtx | ||
c, ok := coll.Columns[colID] | ||
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID) | ||
colInfoID := colID | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above |
||
if len(coll.UniqueID2colInfoID) > 0 { | ||
colInfoID = coll.UniqueID2colInfoID[colID] | ||
} | ||
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should distinguish colId and colUniqueId in explain such as "colID: xxx" , "colUniqueID: XXX" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want the column name in the EXPLAIN result, which should be fetched using the column ID from the metadata instead of the |
||
if c != nil && c.Info != nil { | ||
name = c.Info.Name.O | ||
} | ||
|
@@ -83,7 +87,11 @@ func GetRowCountByIntColumnRanges(sctx context.PlanContext, coll *statistics.His | |
} | ||
sc := sctx.GetSessionVars().StmtCtx | ||
c, ok := coll.Columns[colID] | ||
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colID) | ||
colInfoID := colID | ||
if len(coll.UniqueID2colInfoID) > 0 { | ||
colInfoID = coll.UniqueID2colInfoID[colID] | ||
} | ||
recordUsedItemStatsStatus(sctx, c, coll.PhysicalID, colInfoID) | ||
if c != nil && c.Info != nil { | ||
name = c.Info.Name.O | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,15 +170,15 @@ func getIndexRowCountForStatsV1(sctx context.PlanContext, coll *statistics.HistC | |
} | ||
var count float64 | ||
var err error | ||
colIDs := coll.Idx2ColumnIDs[idxID] | ||
colIDs := coll.Idx2ColUniqueIDs[idxID] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Index ID is not relevant here, only Column ID has this difference. |
||
var colID int64 | ||
if rangePosition >= len(colIDs) { | ||
colID = -1 | ||
} else { | ||
colID = colIDs[rangePosition] | ||
} | ||
// prefer index stats over column stats | ||
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 { | ||
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && len(idxIDs) > 0 { | ||
idxID := idxIDs[0] | ||
count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang}) | ||
} else { | ||
|
@@ -422,7 +422,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll | |
Collators: make([]collate.Collator, 1), | ||
}, | ||
} | ||
colsIDs := coll.Idx2ColumnIDs[idx.Histogram.ID] | ||
colsIDs := coll.Idx2ColUniqueIDs[idx.Histogram.ID] | ||
singleColumnEstResults := make([]float64, 0, len(indexRange.LowVal)) | ||
// The following codes uses Exponential Backoff to reduce the impact of independent assumption. It works like: | ||
// 1. Calc the selectivity of each column. | ||
|
@@ -449,7 +449,7 @@ func expBackoffEstimation(sctx context.PlanContext, idx *statistics.Index, coll | |
count, err = GetRowCountByColumnRanges(sctx, coll, colID, tmpRan) | ||
selectivity = count / float64(coll.RealtimeCount) | ||
} | ||
if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 { | ||
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 { | ||
// Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call | ||
// `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This | ||
// check avoids infinite recursion. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -215,17 +215,11 @@ const ( | |
|
||
// HistColl is a collection of histogram. It collects enough information for plan to calculate the selectivity. | ||
type HistColl struct { | ||
Columns map[int64]*Column | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Big thanks for refactoring this structure! |
||
Indices map[int64]*Index | ||
// Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner. | ||
Idx2ColumnIDs map[int64][]int64 | ||
// ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner. | ||
ColID2IdxIDs map[int64][]int64 | ||
// MVIdx2Columns maps the index id to its columns by expression.Column. | ||
// For normal index, the column id is enough, as we already have in Idx2ColumnIDs. But currently, mv index needs more | ||
// information to match the filter against the mv index columns, and we need this map to provide this information. | ||
MVIdx2Columns map[int64][]*expression.Column | ||
Comment on lines
-221
to
-227
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: It seems we still using some of these names in our testing code. Feel free to update it or ignore it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what I found, that's used inside a function, I think that's OK. |
||
PhysicalID int64 | ||
// Note that when used in a query, Column use UniqueID as the key while Indices use the index ID in the | ||
// metadata. (See GenerateHistCollFromColumnInfo() for details) | ||
Columns map[int64]*Column | ||
Indices map[int64]*Index | ||
PhysicalID int64 | ||
// TODO: add AnalyzeCount here | ||
RealtimeCount int64 // RealtimeCount is the current table row count, maintained by applying stats delta based on AnalyzeCount. | ||
ModifyCount int64 // Total modify count in a table. | ||
|
@@ -234,9 +228,26 @@ type HistColl struct { | |
StatsVer int | ||
// HavePhysicalID is true means this HistColl is from single table and have its ID's information. | ||
// The physical id is used when try to load column stats from storage. | ||
HavePhysicalID bool | ||
Pseudo bool | ||
HavePhysicalID bool | ||
Pseudo bool | ||
|
||
/* | ||
Fields below are only used in a query, like for estimation, and they will be useless when stored in | ||
the stats cache. (See GenerateHistCollFromColumnInfo() for details) | ||
*/ | ||
|
||
CanNotTriggerLoad bool | ||
// Idx2ColUniqueIDs maps the index id to its column UniqueIDs. It's used to calculate the selectivity in planner. | ||
Idx2ColUniqueIDs map[int64][]int64 | ||
// ColUniqueID2IdxIDs maps the column UniqueID to a list index ids whose first column is it. | ||
// It's used to calculate the selectivity in planner. | ||
ColUniqueID2IdxIDs map[int64][]int64 | ||
// UniqueID2colInfoID maps the column UniqueID to its ID in the metadata. | ||
UniqueID2colInfoID map[int64]int64 | ||
// MVIdx2Columns maps the index id to its columns by expression.Column. | ||
// For normal index, the column id is enough, as we already have in Idx2ColUniqueIDs. But currently, mv index needs more | ||
// information to match the filter against the mv index columns, and we need this map to provide this information. | ||
MVIdx2Columns map[int64][]*expression.Column | ||
} | ||
|
||
// TableMemoryUsage records tbl memory usage | ||
|
@@ -800,13 +811,15 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl { | |
return newColl | ||
} | ||
|
||
// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxIDs and IdxID2ColIDs is built from the given parameter. | ||
// GenerateHistCollFromColumnInfo generates a new HistColl whose ColUniqueID2IdxIDs and Idx2ColUniqueIDs is built from the given parameter. | ||
func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, columns []*expression.Column) *HistColl { | ||
newColHistMap := make(map[int64]*Column) | ||
colInfoID2UniqueID := make(map[int64]int64, len(columns)) | ||
uniqueID2colInfoID := make(map[int64]int64, len(columns)) | ||
idxID2idxInfo := make(map[int64]*model.IndexInfo) | ||
for _, col := range columns { | ||
colInfoID2UniqueID[col.ID] = col.UniqueID | ||
uniqueID2colInfoID[col.UniqueID] = col.ID | ||
} | ||
for id, colHist := range coll.Columns { | ||
uniqueID, ok := colInfoID2UniqueID[id] | ||
|
@@ -853,16 +866,17 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(tblInfo *model.TableInfo, c | |
slices.Sort(idxIDs) | ||
} | ||
newColl := &HistColl{ | ||
PhysicalID: coll.PhysicalID, | ||
HavePhysicalID: coll.HavePhysicalID, | ||
Pseudo: coll.Pseudo, | ||
RealtimeCount: coll.RealtimeCount, | ||
ModifyCount: coll.ModifyCount, | ||
Columns: newColHistMap, | ||
Indices: newIdxHistMap, | ||
ColID2IdxIDs: colID2IdxIDs, | ||
Idx2ColumnIDs: idx2Columns, | ||
MVIdx2Columns: mvIdx2Columns, | ||
PhysicalID: coll.PhysicalID, | ||
HavePhysicalID: coll.HavePhysicalID, | ||
Pseudo: coll.Pseudo, | ||
RealtimeCount: coll.RealtimeCount, | ||
ModifyCount: coll.ModifyCount, | ||
Columns: newColHistMap, | ||
Indices: newIdxHistMap, | ||
ColUniqueID2IdxIDs: colID2IdxIDs, | ||
Idx2ColUniqueIDs: idx2Columns, | ||
UniqueID2colInfoID: uniqueID2colInfoID, | ||
MVIdx2Columns: mvIdx2Columns, | ||
} | ||
return newColl | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I always want to do this, but I can never convince myself. 😃
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
😂 I think performance benefits and memory efficiency from this rule are small. So I think it's totally OK to disable it when it causes problems or inconvenience.