Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

statistics: fix unnecessary index fmsketch loading (#42074) #45084

Open
wants to merge 1 commit into
base: release-5.4
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions statistics/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -559,3 +559,187 @@ func hasPseudoStats(rows [][]interface{}) bool {
}
return false
}
<<<<<<< HEAD
=======

// TestNotLoadedStatsOnAllNULLCol makes sure that stats on a column that only contains NULLs can be used even when it's
// not loaded. This is reasonable because it makes no difference whether it's loaded or not.
func TestNotLoadedStatsOnAllNULLCol(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
h := dom.StatsHandle()
oriLease := h.Lease()
h.SetLease(1000)
defer func() {
h.SetLease(oriLease)
}()
tk := testkit.NewTestKit(t, store)
tk.MustExec("use test")
tk.MustExec("drop table if exists t1")
tk.MustExec("drop table if exists t2")
tk.MustExec("create table t1(a int)")
tk.MustExec("create table t2(a int)")
tk.MustExec("insert into t1 values(null), (null), (null), (null)")
tk.MustExec("insert into t2 values(null), (null)")
tk.MustExec("analyze table t1;")
tk.MustExec("analyze table t2;")

res := tk.MustQuery("explain format = 'brief' select * from t1 left join t2 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 4.00 root test.t1.a, test.t2.a",
"└─HashJoin 4.00 root left outer join, equal:[eq(test.t1.a, test.t2.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
// If we are not using stats on this column (which means we use pseudo estimation), the row count for the Selection will become 2.
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t2.a))",
" │ └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false",
" └─TableReader(Probe) 4.00 root data:TableFullScan",
" └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t2 left join t1 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 2.00 root test.t1.a, test.t2.a",
"└─HashJoin 2.00 root left outer join, equal:[eq(test.t2.a, test.t1.a)]",
// If we are not using stats on this column, the build side will become t2 because of smaller row count.
" ├─TableReader(Build) 0.00 root data:Selection",
// If we are not using stats on this column, the row count for the Selection will become 4.
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t1.a))",
" │ └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false",
" └─TableReader(Probe) 2.00 root data:TableFullScan",
" └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t1 right join t2 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 2.00 root test.t1.a, test.t2.a",
"└─HashJoin 2.00 root right outer join, equal:[eq(test.t1.a, test.t2.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t1.a))",
" │ └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false",
" └─TableReader(Probe) 2.00 root data:TableFullScan",
" └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false"))

res = tk.MustQuery("explain format = 'brief' select * from t2 right join t1 on t1.a=t2.a order by t1.a, t2.a")
res.Check(testkit.Rows(
"Sort 4.00 root test.t1.a, test.t2.a",
"└─HashJoin 4.00 root right outer join, equal:[eq(test.t2.a, test.t1.a)]",
" ├─TableReader(Build) 0.00 root data:Selection",
" │ └─Selection 0.00 cop[tikv] not(isnull(test.t2.a))",
" │ └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false",
" └─TableReader(Probe) 4.00 root data:TableFullScan",
" └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false"))
}

func TestCrossValidationSelectivity(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("set @@tidb_analyze_version = 1")
tk.MustExec("create table t (a int, b int, c int, primary key (a, b) clustered)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values (1,2,3), (1,4,5)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
tk.MustExec("analyze table t")
tk.MustQuery("explain format = 'brief' select * from t where a = 1 and b > 0 and b < 1000 and c > 1000").Check(testkit.Rows(
"TableReader 0.00 root data:Selection",
"└─Selection 0.00 cop[tikv] gt(test.t.c, 1000)",
" └─TableRangeScan 2.00 cop[tikv] table:t range:(1 0,1 1000), keep order:false"))
}

func TestShowHistogramsLoadStatus(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
origLease := h.Lease()
h.SetLease(time.Second)
defer func() { h.SetLease(origLease) }()
tk.MustExec("use test")
tk.MustExec("create table t(a int primary key, b int, c int, index idx(b, c))")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values (1,2,3), (4,5,6)")
require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
tk.MustExec("analyze table t")
require.NoError(t, h.Update(dom.InfoSchema()))
rows := tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Rows()
for _, row := range rows {
if row[3] == "a" || row[3] == "idx" {
require.Equal(t, "allLoaded", row[10].(string))
} else {
require.Equal(t, "allEvicted", row[10].(string))
}
}
}

func TestSingleColumnIndexNDV(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, c varchar(20), d varchar(20), index idx_a(a), index idx_b(b), index idx_c(c), index idx_d(d))")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("insert into t values (1, 1, 'xxx', 'zzz'), (2, 2, 'yyy', 'zzz'), (1, 3, null, 'zzz')")
for i := 0; i < 5; i++ {
tk.MustExec("insert into t select * from t")
}
tk.MustExec("analyze table t")
rows := tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Sort().Rows()
expectedResults := [][]string{
{"a", "2", "0"}, {"b", "3", "0"}, {"c", "2", "32"}, {"d", "1", "0"},
{"idx_a", "2", "0"}, {"idx_b", "3", "0"}, {"idx_c", "2", "32"}, {"idx_d", "1", "0"},
}
for i, row := range rows {
require.Equal(t, expectedResults[i][0], row[3]) // column_name
require.Equal(t, expectedResults[i][1], row[6]) // distinct_count
require.Equal(t, expectedResults[i][2], row[7]) // null_count
}
}

func TestColumnStatsLazyLoad(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
originLease := h.Lease()
defer h.SetLease(originLease)
// Set `Lease` to `Millisecond` to enable column stats lazy load.
h.SetLease(time.Millisecond)
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int)")
tk.MustExec("insert into t values (1,2), (3,4), (5,6), (7,8)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("analyze table t")
is := dom.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tbl.Meta()
c1 := tblInfo.Columns[0]
c2 := tblInfo.Columns[1]
require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
tk.MustExec("analyze table t")
require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
}

func TestUpdateNotLoadIndexFMSketch(t *testing.T) {
store, dom := testkit.CreateMockStoreAndDomain(t)
tk := testkit.NewTestKit(t, store)
h := dom.StatsHandle()
tk.MustExec("use test")
tk.MustExec("create table t(a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (10),partition p1 values less than maxvalue)")
tk.MustExec("insert into t values (1,2), (3,4), (5,6), (7,8)")
require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
tk.MustExec("analyze table t")
is := dom.InfoSchema()
tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
require.NoError(t, err)
tblInfo := tbl.Meta()
idxInfo := tblInfo.Indices[0]
p0 := tblInfo.Partition.Definitions[0]
p1 := tblInfo.Partition.Definitions[1]
require.Nil(t, h.GetPartitionStats(tblInfo, p0.ID).Indices[idxInfo.ID].FMSketch)
require.Nil(t, h.GetPartitionStats(tblInfo, p1.ID).Indices[idxInfo.ID].FMSketch)
h.Clear()
require.NoError(t, h.Update(is))
require.Nil(t, h.GetPartitionStats(tblInfo, p0.ID).Indices[idxInfo.ID].FMSketch)
require.Nil(t, h.GetPartitionStats(tblInfo, p1.ID).Indices[idxInfo.ID].FMSketch)
}
>>>>>>> cdab35847f8 (statistics: fix unnecessary index fmsketch loading (#42074))
Loading