Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: clean up dropped predicate columns stats usage #53680

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/statistics/handle/usage/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,12 @@ go_test(
timeout = "short",
srcs = [
"index_usage_integration_test.go",
"predicate_column_test.go",
"session_stats_collect_test.go",
],
embed = [":usage"],
flaky = True,
shard_count = 3,
deps = [
"//pkg/infoschema",
"//pkg/parser/model",
Expand Down
36 changes: 35 additions & 1 deletion pkg/statistics/handle/usage/predicate_column.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ package usage

import (
"encoding/json"
"fmt"
"time"

"github.com/pingcap/errors"
"github.com/pingcap/tidb/pkg/infoschema"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
"github.com/pingcap/tidb/pkg/sessionctx"
Expand Down Expand Up @@ -65,7 +67,7 @@ func (u *statsUsageImpl) GetPredicateColumns(tableID int64) (columnIDs []int64,
err = utilstats.CallWithSCtx(u.statsHandle.SPool(), func(sctx sessionctx.Context) error {
columnIDs, err = GetPredicateColumns(sctx, tableID)
return err
})
}, utilstats.FlagWrapTxn)
return
}

Expand Down Expand Up @@ -123,6 +125,11 @@ func LoadColumnStatsUsage(sctx sessionctx.Context, loc *time.Location) (map[mode

// GetPredicateColumns returns IDs of predicate columns, which are the columns whose stats are used(needed) when generating query plans.
func GetPredicateColumns(sctx sessionctx.Context, tableID int64) ([]int64, error) {
// Each time we retrieve the predicate columns, we also attempt to remove any column stats usage information whose column is dropped.
err := cleanupDroppedColumnStatsUsage(sctx, tableID)
if err != nil {
return nil, errors.Trace(err)
}
// This time is the time when `set global tidb_enable_column_tracking = 0`.
disableTime, err := getDisableColumnTrackingTime(sctx)
if err != nil {
Expand Down Expand Up @@ -159,6 +166,33 @@ func GetPredicateColumns(sctx sessionctx.Context, tableID int64) ([]int64, error
return columnIDs, nil
}

// cleanupDroppedColumnStatsUsage deletes the column stats usage information whose column is dropped.
func cleanupDroppedColumnStatsUsage(sctx sessionctx.Context, tableID int64) error {
is := sctx.GetDomainInfoSchema().(infoschema.InfoSchema)
table, ok := is.TableByID(tableID)
if !ok {
// Usually, it should not happen.
// But if it happens, we can safely do nothing.
return nil
}
allColumns := table.Meta().Columns
// Due to SQL limitations, column IDs must be converted to strings for proper escaping in the query :(
columnIDs := make([]string, 0, len(allColumns))
for _, col := range allColumns {
columnIDs = append(columnIDs, fmt.Sprintf("%d", col.ID))
}

// Delete the column stats usage information whose column is dropped.
_, _, err := utilstats.ExecRows(
AilinKid marked this conversation as resolved.
Show resolved Hide resolved
sctx,
"DELETE FROM mysql.column_stats_usage WHERE table_id = %? AND column_id NOT IN (%?)",
tableID,
columnIDs,
)

return err
}

// getDisableColumnTrackingTime reads the value of tidb_disable_column_tracking_time from mysql.tidb if it exists.
// UTC time format is used to store the time.
func getDisableColumnTrackingTime(sctx sessionctx.Context) (*time.Time, error) {
Expand Down
56 changes: 56 additions & 0 deletions pkg/statistics/handle/usage/predicate_column_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package usage_test

import (
"testing"

"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/testkit"
"github.com/stretchr/testify/require"
)

func TestCleanupPredicateColumns(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)

// Create table and select data with predicate.
tk.MustExec("use test")
tk.MustExec("create table t (a int, b int)")
tk.MustExec("insert into t values (1, 1), (2, 2), (3, 3)")
// Enable column tracking.
tk.MustExec("set global tidb_enable_column_tracking = 1")
tk.MustQuery("select * from t where a > 1").Check(testkit.Rows("2 2", "3 3"))
tk.MustQuery("select * from t where b > 1").Check(testkit.Rows("2 2", "3 3"))

// Dump the statistics usage.
h := dom.StatsHandle()
err := h.DumpColStatsUsageToKV()
require.NoError(t, err)

// Check the statistics usage.
rows := tk.MustQuery("select * from mysql.column_stats_usage").Rows()
require.Len(t, rows, 2)

// Drop column b.
tk.MustExec("alter table t drop column b")
// Get table ID.
is := dom.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
columns, err := h.GetPredicateColumns(tbl.Meta().ID)
require.NoError(t, err)
require.Len(t, columns, 1)
}