Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: do not load unnecessary index statistics #54060

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/statistics/handle/handle_hist.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,9 @@ func (*Handle) readStatsForOneItem(sctx sessionctx.Context, item model.TableItem
return nil, errors.Trace(err)
}
if len(rows) == 0 {
logutil.BgLogger().Error("fail to get stats version for this histogram", zap.Int64("table_id", item.TableID),
logutil.BgLogger().Error("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`", zap.Int64("table_id", item.TableID),
zap.Int64("hist_id", item.ID), zap.Bool("is_index", item.IsIndex))
return nil, errors.Trace(fmt.Errorf("fail to get stats version for this histogram, table_id:%v, hist_id:%v, is_index:%v", item.TableID, item.ID, item.IsIndex))
return nil, errors.Trace(fmt.Errorf("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`, table_id:%v, hist_id:%v, is_index:%v", item.TableID, item.ID, item.IsIndex))
}
statsVer := rows[0].GetInt64(0)
if item.IsIndex {
Expand Down
19 changes: 14 additions & 5 deletions pkg/statistics/handle/storage/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,8 @@ func loadNeededColumnHistograms(sctx sessionctx.Context, statsCache util.StatsCa
return errors.Trace(err)
}
if len(rows) == 0 {
logutil.BgLogger().Error("fail to get stats version for this histogram", zap.Int64("table_id", col.TableID), zap.Int64("hist_id", col.ID))
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, table_id:%v, hist_id:%v", col.TableID, col.ID))
logutil.BgLogger().Error("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`", zap.Int64("table_id", col.TableID), zap.Int64("column_id", col.ID))
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`, table_id:%v, column_id:%v", col.TableID, col.ID))
}
statsVer := rows[0].GetInt64(0)
colHist := &statistics.Column{
Expand Down Expand Up @@ -576,7 +576,16 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache util.StatsCac
return nil
}
index, ok := tbl.Indices[idx.ID]
if !ok {
// Double check if the index is really needed to load.
Rustin170506 marked this conversation as resolved.
Show resolved Hide resolved
Rustin170506 marked this conversation as resolved.
Show resolved Hide resolved
// If we don't do this it might cause a memory leak.
// See: https://github.com/pingcap/tidb/issues/54022
if !ok || !index.IsLoadNeeded() {
if !index.IsLoadNeeded() {
logutil.BgLogger().Warn(
"Although the index stats is not required to load, an attempt is still made to load it.",
zap.Int64("table_id", idx.TableID), zap.Int64("hist_id", idx.ID),
)
}
statistics.HistogramNeededItems.Delete(idx)
return nil
}
Expand All @@ -600,8 +609,8 @@ func loadNeededIndexHistograms(sctx sessionctx.Context, statsCache util.StatsCac
return errors.Trace(err)
}
if len(rows) == 0 {
logutil.BgLogger().Error("fail to get stats version for this histogram", zap.Int64("table_id", idx.TableID), zap.Int64("hist_id", idx.ID))
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, table_id:%v, hist_id:%v", idx.TableID, idx.ID))
logutil.BgLogger().Error("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`", zap.Int64("table_id", idx.TableID), zap.Int64("index_id", idx.ID))
return errors.Trace(fmt.Errorf("fail to get stats version for this histogram, normally this wouldn't happen, please check if this column or index has a histogram record in `mysql.stats_histogram`, table_id:%v, index_id:%v", idx.TableID, idx.ID))
}
idxHist := &statistics.Index{Histogram: *hg, CMSketch: cms, TopN: topN, FMSketch: fms,
Info: index.Info, StatsVer: rows[0].GetInt64(0),
Expand Down
2 changes: 2 additions & 0 deletions tests/realtikvtest/statisticstest/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ go_test(
flaky = True,
race = "on",
deps = [
"//pkg/parser/model",
"//pkg/statistics",
"//pkg/testkit",
"//tests/realtikvtest",
"@com_github_stretchr_testify//require",
Expand Down
68 changes: 68 additions & 0 deletions tests/realtikvtest/statisticstest/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@
package statisticstest

import (
"context"
"fmt"
"testing"
"time"

"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/statistics"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/pingcap/tidb/tests/realtikvtest"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -232,3 +236,67 @@ func checkFMSketch(tk *testkit.TestKit) {
tk.MustQuery(`SHOW STATS_HISTOGRAMS WHERE TABLE_NAME='employees' and partition_name="global" and column_name="id"`).CheckAt([]int{6}, [][]any{
{"14"}})
}

func TestNoNeedIndexStatsLoading(t *testing.T) {
store, dom := realtikvtest.CreateMockStoreAndDomainAndSetup(t)
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test;")
tk.MustExec("drop table if exists t;")
tk.MustExec("create table if not exists t(a int, b int, index ia(a));")
tk.MustExec("drop stats t;")
tk.MustExec("insert into t value(1,1), (2,2);")
require.Eventually(t, func() bool {
rows := tk.MustQuery("show stats_meta").Rows()
return len(rows) > 0
}, 2*time.Minute, 5*time.Millisecond)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use this to trigger it immediately.

Suggested change
require.Eventually(t, func() bool {
rows := tk.MustQuery("show stats_meta").Rows()
return len(rows) > 0
}, 2*time.Minute, 5*time.Millisecond)
h := dom.StatsHandle()
require.NoError(t, h.DumpStatsDeltaToKV(true))
require.NoError(t, h.Update(dom.InfoSchema()))

Copy link
Member Author

@Rustin170506 Rustin170506 Jun 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! I forgot we can directly call an update here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

tk.MustExec("set tidb_opt_objective='determinate';")
tk.MustQuery("select * from t where a = 1 and b = 1;").Check(testkit.Rows("1 1"))
table, err := dom.InfoSchema().TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
checkTableIDInItems(t, table.Meta().ID)
}

func checkTableIDInItems(t *testing.T, tableID int64) {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()

ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()

done := make(chan bool)

// First, confirm that the table ID is in the items.
items := statistics.HistogramNeededItems.AllItems()
for _, item := range items {
if item.TableID == tableID {
// Then, continuously check until it no longer exists or timeout.
go func() {
for {
select {
case <-ticker.C:
items := statistics.HistogramNeededItems.AllItems()
found := false
for _, item := range items {
if item.TableID == tableID {
found = true
}
}
if !found {
done <- true
time-and-fate marked this conversation as resolved.
Show resolved Hide resolved
}
case <-ctx.Done():
return
}
}
}()
break
}
}

select {
case <-done:
t.Log("Table ID has been removed from items")
case <-ctx.Done():
t.Fatal("Timeout: Table ID was not removed from items within the time limit")
}
}