diff --git a/statistics/BUILD.bazel b/statistics/BUILD.bazel index 6a1b3d5a54921..e6992020197c3 100644 --- a/statistics/BUILD.bazel +++ b/statistics/BUILD.bazel @@ -112,6 +112,7 @@ go_test( "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_log//:log", "@com_github_stretchr_testify//require", + "@org_golang_x_exp//slices", "@org_uber_go_goleak//:goleak", "@org_uber_go_zap//:zap", ], diff --git a/statistics/index.go b/statistics/index.go index 78246942ffb99..d201aa8fdd14f 100644 --- a/statistics/index.go +++ b/statistics/index.go @@ -222,6 +222,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang totalCount := float64(0) isSingleCol := len(idx.Info.Columns) == 1 for _, indexRange := range indexRanges { + var count float64 lb, err := codec.EncodeKey(sc, nil, indexRange.LowVal...) if err != nil { return 0, err @@ -242,7 +243,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang totalCount++ continue } - count := idx.equalRowCount(lb, realtimeRowCount) + count = idx.equalRowCount(lb, realtimeRowCount) // If the current table row count has changed, we should scale the row count accordingly. count *= idx.GetIncreaseFactor(realtimeRowCount) totalCount += count @@ -262,7 +263,7 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang r := types.NewBytesDatum(rb) lowIsNull := bytes.Equal(lb, nullKeyBytes) if isSingleCol && lowIsNull { - totalCount += float64(idx.Histogram.NullCount) + count += float64(idx.Histogram.NullCount) } expBackoffSuccess := false // Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything. @@ -301,16 +302,17 @@ func (idx *Index) GetRowCount(sctx sessionctx.Context, coll *HistColl, indexRang } } if !expBackoffSuccess { - totalCount += idx.BetweenRowCount(l, r) + count += idx.BetweenRowCount(l, r) } // If the current table row count has changed, we should scale the row count accordingly. - totalCount *= idx.GetIncreaseFactor(realtimeRowCount) + count *= idx.GetIncreaseFactor(realtimeRowCount) // handling the out-of-range part if (idx.outOfRange(l) && !(isSingleCol && lowIsNull)) || idx.outOfRange(r) { totalCount += idx.Histogram.outOfRangeRowCount(&l, &r, modifyCount) } + totalCount += count } totalCount = mathutil.Clamp(totalCount, 0, float64(realtimeRowCount)) return totalCount, nil diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 08ac16612dd61..05a7413fa3d09 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -44,6 +44,7 @@ import ( "github.com/pingcap/tidb/util/mock" "github.com/pingcap/tidb/util/ranger" "github.com/stretchr/testify/require" + "golang.org/x/exp/slices" ) func TestCollationColumnEstimate(t *testing.T) { @@ -891,7 +892,7 @@ func prepareSelectivity(testKit *testkit.TestKit, dom *domain.Domain) (*statisti return statsTbl, nil } -func getRange(start, end int64) []*ranger.Range { +func getRange(start, end int64) ranger.Ranges { ran := &ranger.Range{ LowVal: []types.Datum{types.NewIntDatum(start)}, HighVal: []types.Datum{types.NewIntDatum(end)}, @@ -900,6 +901,21 @@ func getRange(start, end int64) []*ranger.Range { return []*ranger.Range{ran} } +func getRanges(start, end []int64) (res ranger.Ranges) { + if len(start) != len(end) { + return nil + } + for i := range start { + ran := &ranger.Range{ + LowVal: []types.Datum{types.NewIntDatum(start[i])}, + HighVal: []types.Datum{types.NewIntDatum(end[i])}, + Collators: collate.GetBinaryCollatorSlice(1), + } + res = append(res, ran) + } + return +} + func TestSelectivityGreedyAlgo(t *testing.T) { nodes := make([]*statistics.StatsNode, 3) nodes[0] = statistics.MockStatsNode(1, 3, 2) @@ -1075,3 +1091,69 @@ func TestGlobalStatsOutOfRangeEstimationAfterDelete(t *testing.T) { testKit.MustQuery(input[i]).Check(testkit.Rows(output[i].Result...)) } } + +func generateMapsForMockStatsTbl(statsTbl *statistics.Table) { + idx2Columns := make(map[int64][]int64) + colID2IdxIDs := make(map[int64][]int64) + for _, idxHist := range statsTbl.Indices { + ids := make([]int64, 0, len(idxHist.Info.Columns)) + for _, idxCol := range idxHist.Info.Columns { + ids = append(ids, int64(idxCol.Offset)) + } + colID2IdxIDs[ids[0]] = append(colID2IdxIDs[ids[0]], idxHist.ID) + idx2Columns[idxHist.ID] = ids + } + for _, idxIDs := range colID2IdxIDs { + slices.Sort(idxIDs) + } + statsTbl.Idx2ColumnIDs = idx2Columns + statsTbl.ColID2IdxIDs = colID2IdxIDs +} + +func TestIssue39593(t *testing.T) { + store, dom := testkit.CreateMockStoreAndDomain(t) + testKit := testkit.NewTestKit(t, store) + + testKit.MustExec("use test") + testKit.MustExec("drop table if exists t") + testKit.MustExec("create table t(a int, b int, index idx(a, b))") + is := dom.InfoSchema() + tb, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t")) + require.NoError(t, err) + tblInfo := tb.Meta() + + // mock the statistics.Table + statsTbl := mockStatsTable(tblInfo, 540) + colValues, err := generateIntDatum(1, 54) + require.NoError(t, err) + for i := 1; i <= 2; i++ { + statsTbl.Columns[int64(i)] = &statistics.Column{ + Histogram: *mockStatsHistogram(int64(i), colValues, 10, types.NewFieldType(mysql.TypeLonglong)), + Info: tblInfo.Columns[i-1], + StatsLoadedStatus: statistics.NewStatsFullLoadStatus(), + StatsVer: 2, + } + } + idxValues, err := generateIntDatum(2, 3) + require.NoError(t, err) + tp := types.NewFieldType(mysql.TypeBlob) + statsTbl.Indices[1] = &statistics.Index{ + Histogram: *mockStatsHistogram(1, idxValues, 60, tp), + Info: tblInfo.Indices[0], + StatsVer: 2, + } + generateMapsForMockStatsTbl(statsTbl) + + sctx := testKit.Session() + idxID := tblInfo.Indices[0].ID + vals := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20} + count, err := statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals, vals)) + require.NoError(t, err) + // estimated row count without any changes + require.Equal(t, float64(360), count) + statsTbl.Count *= 10 + count, err = statsTbl.GetRowCountByIndexRanges(sctx, idxID, getRanges(vals, vals)) + require.NoError(t, err) + // estimated row count after mock modify on the table + require.Equal(t, float64(3600), count) +}