diff --git a/cmd/explaintest/r/imdbload.result b/cmd/explaintest/r/imdbload.result index c3ee5badab7e6..00543e6a1640f 100644 --- a/cmd/explaintest/r/imdbload.result +++ b/cmd/explaintest/r/imdbload.result @@ -286,7 +286,7 @@ IndexLookUp_7 1005030.94 root └─TableRowIDScan_6(Probe) 1005030.94 cop[tikv] table:char_name keep order:false trace plan target = 'estimation' select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436')); CE_trace -[{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'I'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'L'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`or`(`and`(`eq`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.surname_pcode, 'E436')), `and`(`eq`(imdbload.char_name.imdb_index, 'L'), `lt`(imdbload.char_name.surname_pcode, 'E436')))","row_count":804024}] +[{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'I'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Point","expr":"((imdb_index = 'L'))","row_count":0},{"table_name":"char_name","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":4314864},{"table_name":"char_name","type":"Column Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Index Stats-Range","expr":"((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'))","row_count":0},{"table_name":"char_name","type":"Index Stats-Range","expr":"((surname_pcode < 'E436'))","row_count":1005030},{"table_name":"char_name","type":"Table Stats-Expression-CNF","expr":"`or`(`and`(`eq`(imdbload.char_name.imdb_index, 'I'), `lt`(imdbload.char_name.surname_pcode, 'E436')), `and`(`eq`(imdbload.char_name.imdb_index, 'L'), `lt`(imdbload.char_name.surname_pcode, 'E436')))","row_count":804024}] explain select * from char_name where ((imdb_index = 'V') and (surname_pcode < 'L3416')); id estRows task access object operator info @@ -356,7 +356,7 @@ IndexLookUp_11 901.00 root └─TableRowIDScan_9 901.00 cop[tikv] table:keyword keep order:false trace plan target = 'estimation' select * from keyword where ((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers')); CE_trace -[{"table_name":"keyword","type":"Column Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":236627},{"table_name":"keyword","type":"Column Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44075},{"table_name":"keyword","type":"Index Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword >= 'ecg-m' and keyword <= 'kille'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":901},{"table_name":"keyword","type":"Table Stats-Expression-CNF","expr":"`and`(`eq`(imdbload.keyword.phonetic_code, 'R1652'), `and`(`gt`(imdbload.keyword.keyword, 'ecg-monitor'), `lt`(imdbload.keyword.keyword, 'killers')))","row_count":901}] +[{"table_name":"keyword","type":"Column Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":236627},{"table_name":"keyword","type":"Column Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44075},{"table_name":"keyword","type":"Index Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword >= 'ecg-m' and keyword <= 'kille'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":901},{"table_name":"keyword","type":"Table Stats-Expression-CNF","expr":"`and`(`eq`(imdbload.keyword.phonetic_code, 'R1652'), `and`(`gt`(imdbload.keyword.keyword, 'ecg-monitor'), `lt`(imdbload.keyword.keyword, 'killers')))","row_count":901}] explain select * from cast_info where (nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida'); id estRows task access object operator info diff --git a/planner/core/find_best_task.go b/planner/core/find_best_task.go index c73ce9f3c086d..2389f56da5337 100644 --- a/planner/core/find_best_task.go +++ b/planner/core/find_best_task.go @@ -1711,9 +1711,10 @@ func (ds *DataSource) crossEstimateRowCount(path *util.AccessPath, conds []expre if len(ranges) == 0 || len(accessConds) == 0 || err != nil { return 0, err == nil, corr } - idxID, idxExists := ds.stats.HistColl.ColID2IdxID[colID] - if !idxExists { - idxID = -1 + idxID := int64(-1) + idxIDs, idxExists := ds.stats.HistColl.ColID2IdxIDs[colID] + if idxExists && len(idxIDs) > 0 { + idxID = idxIDs[0] } rangeCounts, ok := getColumnRangeCounts(ds.ctx, colID, ranges, ds.tableStats.HistColl, idxID) if !ok { diff --git a/statistics/histogram.go b/statistics/histogram.go index 2133ccad3b53b..8c662b6f04061 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -997,7 +997,7 @@ func (coll *HistColl) NewHistCollBySelectivity(sctx sessionctx.Context, statsNod Columns: make(map[int64]*Column), Indices: make(map[int64]*Index), Idx2ColumnIDs: coll.Idx2ColumnIDs, - ColID2IdxID: coll.ColID2IdxID, + ColID2IdxIDs: coll.ColID2IdxIDs, Count: coll.Count, } for _, node := range statsNodes { diff --git a/statistics/index.go b/statistics/index.go index 6b8a88501c30e..71d2aa839bd61 100644 --- a/statistics/index.go +++ b/statistics/index.go @@ -346,14 +346,30 @@ func (idx *Index) expBackoffEstimation(sctx sessionctx.Context, coll *HistColl, } colID := colsIDs[i] var ( - count float64 - err error + count float64 + err error + foundStats bool ) - if anotherIdxID, ok := coll.ColID2IdxID[colID]; ok && anotherIdxID != idx.Histogram.ID { - count, err = coll.GetRowCountByIndexRanges(sctx, anotherIdxID, tmpRan) - } else if col, ok := coll.Columns[colID]; ok && !col.IsInvalid(sctx, coll.Pseudo) { + if col, ok := coll.Columns[colID]; ok && !col.IsInvalid(sctx, coll.Pseudo) { + foundStats = true count, err = coll.GetRowCountByColumnRanges(sctx, colID, tmpRan) - } else { + } + if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && !foundStats && len(indexRange.LowVal) > 1 { + // Note the `len(indexRange.LowVal) > 1` condition here, it means we only recursively call + // `GetRowCountByIndexRanges()` when the input `indexRange` is a multi-column range. This + // check avoids infinite recursion. + for _, idxID := range idxIDs { + if idxID == idx.Histogram.ID { + continue + } + foundStats = true + count, err = coll.GetRowCountByIndexRanges(sctx, idxID, tmpRan) + if err == nil { + break + } + } + } + if !foundStats { continue } if err != nil { diff --git a/statistics/table.go b/statistics/table.go index 81cb4e9bf284f..2e66e39ab8152 100644 --- a/statistics/table.go +++ b/statistics/table.go @@ -101,10 +101,10 @@ type HistColl struct { Indices map[int64]*Index // Idx2ColumnIDs maps the index id to its column ids. It's used to calculate the selectivity in planner. Idx2ColumnIDs map[int64][]int64 - // ColID2IdxID maps the column id to index id whose first column is it. It's used to calculate the selectivity in planner. - ColID2IdxID map[int64]int64 - Count int64 - ModifyCount int64 // Total modify count in a table. + // ColID2IdxIDs maps the column id to a list index ids whose first column is it. It's used to calculate the selectivity in planner. + ColID2IdxIDs map[int64][]int64 + Count int64 + ModifyCount int64 // Total modify count in a table. // HavePhysicalID is true means this HistColl is from single table and have its ID's information. // The physical id is used when try to load column stats from storage. @@ -846,7 +846,7 @@ func (coll *HistColl) ID2UniqueID(columns []*expression.Column) *HistColl { return newColl } -// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxID and IdxID2ColIDs is built from the given parameter. +// GenerateHistCollFromColumnInfo generates a new HistColl whose ColID2IdxIDs and IdxID2ColIDs is built from the given parameter. func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, columns []*expression.Column) *HistColl { newColHistMap := make(map[int64]*Column) colInfoID2UniqueID := make(map[int64]int64, len(columns)) @@ -869,7 +869,7 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, } newIdxHistMap := make(map[int64]*Index) idx2Columns := make(map[int64][]int64) - colID2IdxID := make(map[int64]int64) + colID2IdxIDs := make(map[int64][]int64) for _, idxHist := range coll.Indices { ids := make([]int64, 0, len(idxHist.Info.Columns)) for _, idxCol := range idxHist.Info.Columns { @@ -883,10 +883,13 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, if len(ids) == 0 { continue } - colID2IdxID[ids[0]] = idxHist.ID + colID2IdxIDs[ids[0]] = append(colID2IdxIDs[ids[0]], idxHist.ID) newIdxHistMap[idxHist.ID] = idxHist idx2Columns[idxHist.ID] = ids } + for _, idxIDs := range colID2IdxIDs { + slices.Sort(idxIDs) + } newColl := &HistColl{ PhysicalID: coll.PhysicalID, HavePhysicalID: coll.HavePhysicalID, @@ -895,7 +898,7 @@ func (coll *HistColl) GenerateHistCollFromColumnInfo(infos []*model.ColumnInfo, ModifyCount: coll.ModifyCount, Columns: newColHistMap, Indices: newIdxHistMap, - ColID2IdxID: colID2IdxID, + ColID2IdxIDs: colID2IdxIDs, Idx2ColumnIDs: idx2Columns, } return newColl @@ -1084,8 +1087,9 @@ func (coll *HistColl) getIndexRowCount(sctx sessionctx.Context, idxID int64, ind colID = colIDs[rangePosition] } // prefer index stats over column stats - if idx, ok := coll.ColID2IdxID[colID]; ok { - count, err = coll.GetRowCountByIndexRanges(sctx, idx, []*ranger.Range{&rang}) + if idxIDs, ok := coll.ColID2IdxIDs[colID]; ok && len(idxIDs) > 0 { + idxID := idxIDs[0] + count, err = coll.GetRowCountByIndexRanges(sctx, idxID, []*ranger.Range{&rang}) } else { count, err = coll.GetRowCountByColumnRanges(sctx, colID, []*ranger.Range{&rang}) }