From 4d329e5e2f9350c9d927f35f168337786123a5a5 Mon Sep 17 00:00:00 2001
From: Yifan Xu <30385241+xuyifangreeneyes@users.noreply.github.com>
Date: Fri, 10 Mar 2023 17:03:13 +0800
Subject: [PATCH] This is an automated cherry-pick of #42074

Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
---
 statistics/integration_test.go      | 184 +++++++++++
 statistics/interact_with_storage.go | 484 ++++++++++++++++++++++++++++
 2 files changed, 668 insertions(+)
 create mode 100644 statistics/interact_with_storage.go

diff --git a/statistics/integration_test.go b/statistics/integration_test.go
index e2a52d6401ad3..11f1585f4e639 100644
--- a/statistics/integration_test.go
+++ b/statistics/integration_test.go
@@ -559,3 +559,187 @@ func hasPseudoStats(rows [][]interface{}) bool {
 	}
 	return false
 }
+<<<<<<< HEAD
+=======
+
+// TestNotLoadedStatsOnAllNULLCol makes sure that stats on a column that only contains NULLs can be used even when it's
+// not loaded. This is reasonable because it makes no difference whether it's loaded or not.
+func TestNotLoadedStatsOnAllNULLCol(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	h := dom.StatsHandle()
+	oriLease := h.Lease()
+	h.SetLease(1000)
+	defer func() {
+		h.SetLease(oriLease)
+	}()
+	tk := testkit.NewTestKit(t, store)
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t1")
+	tk.MustExec("drop table if exists t2")
+	tk.MustExec("create table t1(a int)")
+	tk.MustExec("create table t2(a int)")
+	tk.MustExec("insert into t1 values(null), (null), (null), (null)")
+	tk.MustExec("insert into t2 values(null), (null)")
+	tk.MustExec("analyze table t1;")
+	tk.MustExec("analyze table t2;")
+
+	res := tk.MustQuery("explain format = 'brief' select * from t1 left join t2 on t1.a=t2.a order by t1.a, t2.a")
+	res.Check(testkit.Rows(
+		"Sort 4.00 root  test.t1.a, test.t2.a",
+		"└─HashJoin 4.00 root  left outer join, equal:[eq(test.t1.a, test.t2.a)]",
+		"  ├─TableReader(Build) 0.00 root  data:Selection",
+		// If we are not using stats on this column (which means we use pseudo estimation), the row count for the Selection will become 2.
+		"  │ └─Selection 0.00 cop[tikv]  not(isnull(test.t2.a))",
+		"  │   └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false",
+		"  └─TableReader(Probe) 4.00 root  data:TableFullScan",
+		"    └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false"))
+
+	res = tk.MustQuery("explain format = 'brief' select * from t2 left join t1 on t1.a=t2.a order by t1.a, t2.a")
+	res.Check(testkit.Rows(
+		"Sort 2.00 root  test.t1.a, test.t2.a",
+		"└─HashJoin 2.00 root  left outer join, equal:[eq(test.t2.a, test.t1.a)]",
+		// If we are not using stats on this column, the build side will become t2 because of smaller row count.
+		"  ├─TableReader(Build) 0.00 root  data:Selection",
+		// If we are not using stats on this column, the row count for the Selection will become 4.
+		"  │ └─Selection 0.00 cop[tikv]  not(isnull(test.t1.a))",
+		"  │   └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false",
+		"  └─TableReader(Probe) 2.00 root  data:TableFullScan",
+		"    └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false"))
+
+	res = tk.MustQuery("explain format = 'brief' select * from t1 right join t2 on t1.a=t2.a order by t1.a, t2.a")
+	res.Check(testkit.Rows(
+		"Sort 2.00 root  test.t1.a, test.t2.a",
+		"└─HashJoin 2.00 root  right outer join, equal:[eq(test.t1.a, test.t2.a)]",
+		"  ├─TableReader(Build) 0.00 root  data:Selection",
+		"  │ └─Selection 0.00 cop[tikv]  not(isnull(test.t1.a))",
+		"  │   └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false",
+		"  └─TableReader(Probe) 2.00 root  data:TableFullScan",
+		"    └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false"))
+
+	res = tk.MustQuery("explain format = 'brief' select * from t2 right join t1 on t1.a=t2.a order by t1.a, t2.a")
+	res.Check(testkit.Rows(
+		"Sort 4.00 root  test.t1.a, test.t2.a",
+		"└─HashJoin 4.00 root  right outer join, equal:[eq(test.t2.a, test.t1.a)]",
+		"  ├─TableReader(Build) 0.00 root  data:Selection",
+		"  │ └─Selection 0.00 cop[tikv]  not(isnull(test.t2.a))",
+		"  │   └─TableFullScan 2.00 cop[tikv] table:t2 keep order:false",
+		"  └─TableReader(Probe) 4.00 root  data:TableFullScan",
+		"    └─TableFullScan 4.00 cop[tikv] table:t1 keep order:false"))
+}
+
+func TestCrossValidationSelectivity(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	tk := testkit.NewTestKit(t, store)
+	h := dom.StatsHandle()
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("set @@tidb_analyze_version = 1")
+	tk.MustExec("create table t (a int, b int, c int, primary key (a, b) clustered)")
+	require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
+	tk.MustExec("insert into t values (1,2,3), (1,4,5)")
+	require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
+	tk.MustExec("analyze table t")
+	tk.MustQuery("explain format = 'brief' select * from t where a = 1 and b > 0 and b < 1000 and c > 1000").Check(testkit.Rows(
+		"TableReader 0.00 root  data:Selection",
+		"└─Selection 0.00 cop[tikv]  gt(test.t.c, 1000)",
+		"  └─TableRangeScan 2.00 cop[tikv] table:t range:(1 0,1 1000), keep order:false"))
+}
+
+func TestShowHistogramsLoadStatus(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	tk := testkit.NewTestKit(t, store)
+	h := dom.StatsHandle()
+	origLease := h.Lease()
+	h.SetLease(time.Second)
+	defer func() { h.SetLease(origLease) }()
+	tk.MustExec("use test")
+	tk.MustExec("create table t(a int primary key, b int, c int, index idx(b, c))")
+	require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
+	tk.MustExec("insert into t values (1,2,3), (4,5,6)")
+	require.NoError(t, h.DumpStatsDeltaToKV(handle.DumpAll))
+	tk.MustExec("analyze table t")
+	require.NoError(t, h.Update(dom.InfoSchema()))
+	rows := tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Rows()
+	for _, row := range rows {
+		if row[3] == "a" || row[3] == "idx" {
+			require.Equal(t, "allLoaded", row[10].(string))
+		} else {
+			require.Equal(t, "allEvicted", row[10].(string))
+		}
+	}
+}
+
+func TestSingleColumnIndexNDV(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	tk := testkit.NewTestKit(t, store)
+	h := dom.StatsHandle()
+	tk.MustExec("use test")
+	tk.MustExec("create table t(a int, b int, c varchar(20), d varchar(20), index idx_a(a), index idx_b(b), index idx_c(c), index idx_d(d))")
+	require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
+	tk.MustExec("insert into t values (1, 1, 'xxx', 'zzz'), (2, 2, 'yyy', 'zzz'), (1, 3, null, 'zzz')")
+	for i := 0; i < 5; i++ {
+		tk.MustExec("insert into t select * from t")
+	}
+	tk.MustExec("analyze table t")
+	rows := tk.MustQuery("show stats_histograms where db_name = 'test' and table_name = 't'").Sort().Rows()
+	expectedResults := [][]string{
+		{"a", "2", "0"}, {"b", "3", "0"}, {"c", "2", "32"}, {"d", "1", "0"},
+		{"idx_a", "2", "0"}, {"idx_b", "3", "0"}, {"idx_c", "2", "32"}, {"idx_d", "1", "0"},
+	}
+	for i, row := range rows {
+		require.Equal(t, expectedResults[i][0], row[3]) // column_name
+		require.Equal(t, expectedResults[i][1], row[6]) // distinct_count
+		require.Equal(t, expectedResults[i][2], row[7]) // null_count
+	}
+}
+
+func TestColumnStatsLazyLoad(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	tk := testkit.NewTestKit(t, store)
+	h := dom.StatsHandle()
+	originLease := h.Lease()
+	defer h.SetLease(originLease)
+	// Set `Lease` to `Millisecond` to enable column stats lazy load.
+	h.SetLease(time.Millisecond)
+	tk.MustExec("use test")
+	tk.MustExec("create table t(a int, b int)")
+	tk.MustExec("insert into t values (1,2), (3,4), (5,6), (7,8)")
+	require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
+	tk.MustExec("analyze table t")
+	is := dom.InfoSchema()
+	tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	require.NoError(t, err)
+	tblInfo := tbl.Meta()
+	c1 := tblInfo.Columns[0]
+	c2 := tblInfo.Columns[1]
+	require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
+	require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
+	tk.MustExec("analyze table t")
+	require.True(t, h.GetTableStats(tblInfo).Columns[c1.ID].IsAllEvicted())
+	require.True(t, h.GetTableStats(tblInfo).Columns[c2.ID].IsAllEvicted())
+}
+
+func TestUpdateNotLoadIndexFMSketch(t *testing.T) {
+	store, dom := testkit.CreateMockStoreAndDomain(t)
+	tk := testkit.NewTestKit(t, store)
+	h := dom.StatsHandle()
+	tk.MustExec("use test")
+	tk.MustExec("create table t(a int, b int, index idx(a)) partition by range (a) (partition p0 values less than (10),partition p1 values less than maxvalue)")
+	tk.MustExec("insert into t values (1,2), (3,4), (5,6), (7,8)")
+	require.NoError(t, h.HandleDDLEvent(<-h.DDLEventCh()))
+	tk.MustExec("analyze table t")
+	is := dom.InfoSchema()
+	tbl, err := is.TableByName(model.NewCIStr("test"), model.NewCIStr("t"))
+	require.NoError(t, err)
+	tblInfo := tbl.Meta()
+	idxInfo := tblInfo.Indices[0]
+	p0 := tblInfo.Partition.Definitions[0]
+	p1 := tblInfo.Partition.Definitions[1]
+	require.Nil(t, h.GetPartitionStats(tblInfo, p0.ID).Indices[idxInfo.ID].FMSketch)
+	require.Nil(t, h.GetPartitionStats(tblInfo, p1.ID).Indices[idxInfo.ID].FMSketch)
+	h.Clear()
+	require.NoError(t, h.Update(is))
+	require.Nil(t, h.GetPartitionStats(tblInfo, p0.ID).Indices[idxInfo.ID].FMSketch)
+	require.Nil(t, h.GetPartitionStats(tblInfo, p1.ID).Indices[idxInfo.ID].FMSketch)
+}
+>>>>>>> cdab35847f8 (statistics: fix unnecessary index fmsketch loading (#42074))
diff --git a/statistics/interact_with_storage.go b/statistics/interact_with_storage.go
new file mode 100644
index 0000000000000..8231b90dec5d2
--- /dev/null
+++ b/statistics/interact_with_storage.go
@@ -0,0 +1,484 @@
+// Copyright 2023 PingCAP, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package statistics
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"time"
+
+	"github.com/pingcap/errors"
+	"github.com/pingcap/failpoint"
+	"github.com/pingcap/tidb/kv"
+	"github.com/pingcap/tidb/parser/ast"
+	"github.com/pingcap/tidb/parser/model"
+	"github.com/pingcap/tidb/parser/mysql"
+	"github.com/pingcap/tidb/sessionctx/stmtctx"
+	"github.com/pingcap/tidb/types"
+	"github.com/pingcap/tidb/util/chunk"
+	"github.com/pingcap/tidb/util/logutil"
+	"github.com/pingcap/tidb/util/mathutil"
+	"github.com/pingcap/tidb/util/sqlexec"
+	"go.uber.org/zap"
+)
+
+// StatsReader is used for simplifying code that needs to read statistics from system tables(mysql.stats_xxx) in different sqls
+// but requires the same transactions.
+//
+// Note that:
+// 1. Remember to call (*StatsReader).Close after reading all statistics.
+// 2. StatsReader is not thread-safe. Different goroutines cannot call (*StatsReader).Read concurrently.
+type StatsReader struct {
+	ctx      sqlexec.RestrictedSQLExecutor
+	snapshot uint64
+}
+
+// GetStatsReader returns a StatsReader.
+func GetStatsReader(snapshot uint64, exec sqlexec.RestrictedSQLExecutor) (reader *StatsReader, err error) {
+	failpoint.Inject("mockGetStatsReaderFail", func(val failpoint.Value) {
+		if val.(bool) {
+			failpoint.Return(nil, errors.New("gofail genStatsReader error"))
+		}
+	})
+	if snapshot > 0 {
+		return &StatsReader{ctx: exec, snapshot: snapshot}, nil
+	}
+	defer func() {
+		if r := recover(); r != nil {
+			err = fmt.Errorf("getStatsReader panic %v", r)
+		}
+	}()
+	ctx := kv.WithInternalSourceType(context.Background(), kv.InternalTxnStats)
+	failpoint.Inject("mockGetStatsReaderPanic", nil)
+	_, err = exec.(sqlexec.SQLExecutor).ExecuteInternal(ctx, "begin")
+	if err != nil {
+		return nil, err
+	}
+	return &StatsReader{ctx: exec}, nil
+}
+
+// Read is a thin wrapper reading statistics from storage by sql command.
+func (sr *StatsReader) Read(sql string, args ...interface{}) (rows []chunk.Row, fields []*ast.ResultField, err error) {
+	ctx := kv.WithInternalSourceType(context.Background(), kv.InternalTxnStats)
+	if sr.snapshot > 0 {
+		return sr.ctx.ExecRestrictedSQL(ctx, []sqlexec.OptionFuncAlias{sqlexec.ExecOptionUseSessionPool, sqlexec.ExecOptionWithSnapshot(sr.snapshot)}, sql, args...)
+	}
+	return sr.ctx.ExecRestrictedSQL(ctx, []sqlexec.OptionFuncAlias{sqlexec.ExecOptionUseCurSession}, sql, args...)
+}
+
+// IsHistory indicates whether to read history statistics.
+func (sr *StatsReader) IsHistory() bool {
+	return sr.snapshot > 0
+}
+
+// Close closes the StatsReader.
+func (sr *StatsReader) Close() error {
+	if sr.IsHistory() || sr.ctx == nil {
+		return nil
+	}
+	ctx := kv.WithInternalSourceType(context.Background(), kv.InternalTxnStats)
+	_, err := sr.ctx.(sqlexec.SQLExecutor).ExecuteInternal(ctx, "commit")
+	return err
+}
+
+// HistogramFromStorage reads histogram from storage.
+func HistogramFromStorage(reader *StatsReader, tableID int64, colID int64, tp *types.FieldType, distinct int64, isIndex int, ver uint64, nullCount int64, totColSize int64, corr float64) (_ *Histogram, err error) {
+	rows, fields, err := reader.Read("select count, repeats, lower_bound, upper_bound, ndv from mysql.stats_buckets where table_id = %? and is_index = %? and hist_id = %? order by bucket_id", tableID, isIndex, colID)
+	if err != nil {
+		return nil, errors.Trace(err)
+	}
+	bucketSize := len(rows)
+	hg := NewHistogram(colID, distinct, nullCount, ver, tp, bucketSize, totColSize)
+	hg.Correlation = corr
+	totalCount := int64(0)
+	for i := 0; i < bucketSize; i++ {
+		count := rows[i].GetInt64(0)
+		repeats := rows[i].GetInt64(1)
+		var upperBound, lowerBound types.Datum
+		if isIndex == 1 {
+			lowerBound = rows[i].GetDatum(2, &fields[2].Column.FieldType)
+			upperBound = rows[i].GetDatum(3, &fields[3].Column.FieldType)
+		} else {
+			// Invalid date values may be inserted into table under some relaxed sql mode. Those values may exist in statistics.
+			// Hence, when reading statistics, we should skip invalid date check. See #39336.
+			sc := &stmtctx.StatementContext{TimeZone: time.UTC, AllowInvalidDate: true, IgnoreZeroInDate: true}
+			d := rows[i].GetDatum(2, &fields[2].Column.FieldType)
+			// For new collation data, when storing the bounds of the histogram, we store the collate key instead of the
+			// original value.
+			// But there's additional conversion logic for new collation data, and the collate key might be longer than
+			// the FieldType.flen.
+			// If we use the original FieldType here, there might be errors like "Invalid utf8mb4 character string"
+			// or "Data too long".
+			// So we change it to TypeBlob to bypass those logics here.
+			if tp.EvalType() == types.ETString && tp.GetType() != mysql.TypeEnum && tp.GetType() != mysql.TypeSet {
+				tp = types.NewFieldType(mysql.TypeBlob)
+			}
+			lowerBound, err = d.ConvertTo(sc, tp)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+			d = rows[i].GetDatum(3, &fields[3].Column.FieldType)
+			upperBound, err = d.ConvertTo(sc, tp)
+			if err != nil {
+				return nil, errors.Trace(err)
+			}
+		}
+		totalCount += count
+		hg.AppendBucketWithNDV(&lowerBound, &upperBound, totalCount, repeats, rows[i].GetInt64(4))
+	}
+	hg.PreCalculateScalar()
+	return hg, nil
+}
+
+// CMSketchAndTopNFromStorage reads CMSketch and TopN from storage.
+func CMSketchAndTopNFromStorage(reader *StatsReader, tblID int64, isIndex, histID int64) (_ *CMSketch, _ *TopN, err error) {
+	topNRows, _, err := reader.Read("select HIGH_PRIORITY value, count from mysql.stats_top_n where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histID)
+	if err != nil {
+		return nil, nil, err
+	}
+	rows, _, err := reader.Read("select cm_sketch from mysql.stats_histograms where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histID)
+	if err != nil {
+		return nil, nil, err
+	}
+	if len(rows) == 0 {
+		return DecodeCMSketchAndTopN(nil, topNRows)
+	}
+	return DecodeCMSketchAndTopN(rows[0].GetBytes(0), topNRows)
+}
+
+// FMSketchFromStorage reads FMSketch from storage
+func FMSketchFromStorage(reader *StatsReader, tblID int64, isIndex, histID int64) (_ *FMSketch, err error) {
+	rows, _, err := reader.Read("select value from mysql.stats_fm_sketch where table_id = %? and is_index = %? and hist_id = %?", tblID, isIndex, histID)
+	if err != nil || len(rows) == 0 {
+		return nil, err
+	}
+	return DecodeFMSketch(rows[0].GetBytes(0))
+}
+
+// ColumnCountFromStorage reads column count from storage
+func ColumnCountFromStorage(reader *StatsReader, tableID, colID, statsVer int64) (int64, error) {
+	count := int64(0)
+	rows, _, err := reader.Read("select sum(count) from mysql.stats_buckets where table_id = %? and is_index = 0 and hist_id = %?", tableID, colID)
+	if err != nil {
+		return 0, errors.Trace(err)
+	}
+	// If there doesn't exist any buckets, the SQL will return NULL. So we only use the result if it's not NULL.
+	if !rows[0].IsNull(0) {
+		count, err = rows[0].GetMyDecimal(0).ToInt()
+		if err != nil {
+			return 0, errors.Trace(err)
+		}
+	}
+
+	if statsVer >= Version2 {
+		// Before stats ver 2, histogram represents all data in this column.
+		// In stats ver 2, histogram + TopN represent all data in this column.
+		// So we need to add TopN total count here.
+		rows, _, err = reader.Read("select sum(count) from mysql.stats_top_n where table_id = %? and is_index = 0 and hist_id = %?", tableID, colID)
+		if err != nil {
+			return 0, errors.Trace(err)
+		}
+		if !rows[0].IsNull(0) {
+			topNCount, err := rows[0].GetMyDecimal(0).ToInt()
+			if err != nil {
+				return 0, errors.Trace(err)
+			}
+			count += topNCount
+		}
+	}
+	return count, err
+}
+
+// ExtendedStatsFromStorage reads extended stats from storage.
+func ExtendedStatsFromStorage(reader *StatsReader, table *Table, physicalID int64, loadAll bool) (*Table, error) {
+	failpoint.Inject("injectExtStatsLoadErr", func() {
+		failpoint.Return(nil, errors.New("gofail extendedStatsFromStorage error"))
+	})
+	lastVersion := uint64(0)
+	if table.ExtendedStats != nil && !loadAll {
+		lastVersion = table.ExtendedStats.LastUpdateVersion
+	} else {
+		table.ExtendedStats = NewExtendedStatsColl()
+	}
+	rows, _, err := reader.Read("select name, status, type, column_ids, stats, version from mysql.stats_extended where table_id = %? and status in (%?, %?, %?) and version > %?", physicalID, ExtendedStatsInited, ExtendedStatsAnalyzed, ExtendedStatsDeleted, lastVersion)
+	if err != nil || len(rows) == 0 {
+		return table, nil
+	}
+	for _, row := range rows {
+		lastVersion = mathutil.Max(lastVersion, row.GetUint64(5))
+		name := row.GetString(0)
+		status := uint8(row.GetInt64(1))
+		if status == ExtendedStatsDeleted || status == ExtendedStatsInited {
+			delete(table.ExtendedStats.Stats, name)
+		} else {
+			item := &ExtendedStatsItem{
+				Tp: uint8(row.GetInt64(2)),
+			}
+			colIDs := row.GetString(3)
+			err := json.Unmarshal([]byte(colIDs), &item.ColIDs)
+			if err != nil {
+				logutil.BgLogger().Error("[stats] decode column IDs failed", zap.String("column_ids", colIDs), zap.Error(err))
+				return nil, err
+			}
+			statsStr := row.GetString(4)
+			if item.Tp == ast.StatsTypeCardinality || item.Tp == ast.StatsTypeCorrelation {
+				if statsStr != "" {
+					item.ScalarVals, err = strconv.ParseFloat(statsStr, 64)
+					if err != nil {
+						logutil.BgLogger().Error("[stats] parse scalar stats failed", zap.String("stats", statsStr), zap.Error(err))
+						return nil, err
+					}
+				}
+			} else {
+				item.StringVals = statsStr
+			}
+			table.ExtendedStats.Stats[name] = item
+		}
+	}
+	table.ExtendedStats.LastUpdateVersion = lastVersion
+	return table, nil
+}
+
+func indexStatsFromStorage(reader *StatsReader, row chunk.Row, table *Table, tableInfo *model.TableInfo, loadAll bool) error {
+	histID := row.GetInt64(2)
+	distinct := row.GetInt64(3)
+	histVer := row.GetUint64(4)
+	nullCount := row.GetInt64(5)
+	statsVer := row.GetInt64(7)
+	idx := table.Indices[histID]
+	errorRate := ErrorRate{}
+	flag := row.GetInt64(8)
+	lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))
+	if (!IsAnalyzed(flag) || reader.IsHistory()) && idx != nil {
+		errorRate = idx.ErrorRate
+	}
+	for _, idxInfo := range tableInfo.Indices {
+		if histID != idxInfo.ID {
+			continue
+		}
+		if idx == nil || idx.LastUpdateVersion < histVer {
+			hg, err := HistogramFromStorage(reader, table.PhysicalID, histID, types.NewFieldType(mysql.TypeBlob), distinct, 1, histVer, nullCount, 0, 0)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			cms, topN, err := CMSketchAndTopNFromStorage(reader, table.PhysicalID, 1, idxInfo.ID)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			var fmSketch *FMSketch
+			if loadAll {
+				// FMSketch is only used when merging partition stats into global stats. When merging partition stats into global stats,
+				// we load all the statistics, i.e., loadAll is true.
+				fmSketch, err = FMSketchFromStorage(reader, table.PhysicalID, 1, histID)
+				if err != nil {
+					return errors.Trace(err)
+				}
+			}
+			idx = &Index{
+				Histogram:  *hg,
+				CMSketch:   cms,
+				TopN:       topN,
+				FMSketch:   fmSketch,
+				Info:       idxInfo,
+				ErrorRate:  errorRate,
+				StatsVer:   statsVer,
+				Flag:       flag,
+				PhysicalID: table.PhysicalID,
+			}
+			if statsVer != Version0 {
+				idx.StatsLoadedStatus = NewStatsFullLoadStatus()
+			}
+			lastAnalyzePos.Copy(&idx.LastAnalyzePos)
+		}
+		break
+	}
+	if idx != nil {
+		table.Indices[histID] = idx
+	} else {
+		logutil.BgLogger().Debug("we cannot find index id in table info. It may be deleted.", zap.Int64("indexID", histID), zap.String("table", tableInfo.Name.O))
+	}
+	return nil
+}
+
+func columnStatsFromStorage(reader *StatsReader, row chunk.Row, table *Table, tableInfo *model.TableInfo, loadAll bool, lease time.Duration) error {
+	histID := row.GetInt64(2)
+	distinct := row.GetInt64(3)
+	histVer := row.GetUint64(4)
+	nullCount := row.GetInt64(5)
+	totColSize := row.GetInt64(6)
+	statsVer := row.GetInt64(7)
+	correlation := row.GetFloat64(9)
+	lastAnalyzePos := row.GetDatum(10, types.NewFieldType(mysql.TypeBlob))
+	col := table.Columns[histID]
+	errorRate := ErrorRate{}
+	flag := row.GetInt64(8)
+	if (!IsAnalyzed(flag) || reader.IsHistory()) && col != nil {
+		errorRate = col.ErrorRate
+	}
+	for _, colInfo := range tableInfo.Columns {
+		if histID != colInfo.ID {
+			continue
+		}
+		isHandle := tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag())
+		// We will not load buckets if:
+		// 1. lease > 0, and:
+		// 2. this column is not handle, and:
+		// 3. the column doesn't has any statistics before, and:
+		// 4. loadAll is false.
+		//
+		// Here is the explanation of the condition `!col.IsStatsInitialized() || col.IsAllEvicted()`.
+		// For one column:
+		// 1. If there is no stats for it in the storage(i.e., analyze has never been executed before), then its stats status
+		//    would be `!col.IsStatsInitialized()`. In this case we should go the `notNeedLoad` path.
+		// 2. If there exists stats for it in the storage but its stats status is `col.IsAllEvicted()`, there are two
+		//    sub cases for this case. One is that the column stats have never been used/needed by the optimizer so they have
+		//    never been loaded. The other is that the column stats were loaded and then evicted. For the both sub cases,
+		//    we should go the `notNeedLoad` path.
+		// 3. If some parts(Histogram/TopN/CMSketch) of stats for it exist in TiDB memory currently, we choose to load all of
+		//    its new stats once we find stats version is updated.
+		notNeedLoad := lease > 0 &&
+			!isHandle &&
+			(col == nil || ((!col.IsStatsInitialized() || col.IsAllEvicted()) && col.LastUpdateVersion < histVer)) &&
+			!loadAll
+		// Here is
+		//For one column, if there is no stats for it in the storage(analyze is never)
+		if notNeedLoad {
+			count, err := ColumnCountFromStorage(reader, table.PhysicalID, histID, statsVer)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			col = &Column{
+				PhysicalID: table.PhysicalID,
+				Histogram:  *NewHistogram(histID, distinct, nullCount, histVer, &colInfo.FieldType, 0, totColSize),
+				Info:       colInfo,
+				Count:      count + nullCount,
+				ErrorRate:  errorRate,
+				IsHandle:   tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
+				Flag:       flag,
+				StatsVer:   statsVer,
+			}
+			// When adding/modifying a column, we create its stats(all values are default values) without setting stats_ver.
+			// So we need add col.Count > 0 here.
+			if statsVer != Version0 || col.Count > 0 {
+				col.StatsLoadedStatus = NewStatsAllEvictedStatus()
+			}
+			lastAnalyzePos.Copy(&col.LastAnalyzePos)
+			col.Histogram.Correlation = correlation
+			break
+		}
+		if col == nil || col.LastUpdateVersion < histVer || loadAll {
+			hg, err := HistogramFromStorage(reader, table.PhysicalID, histID, &colInfo.FieldType, distinct, 0, histVer, nullCount, totColSize, correlation)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			cms, topN, err := CMSketchAndTopNFromStorage(reader, table.PhysicalID, 0, colInfo.ID)
+			if err != nil {
+				return errors.Trace(err)
+			}
+			var fmSketch *FMSketch
+			if loadAll {
+				// FMSketch is only used when merging partition stats into global stats. When merging partition stats into global stats,
+				// we load all the statistics, i.e., loadAll is true.
+				fmSketch, err = FMSketchFromStorage(reader, table.PhysicalID, 0, histID)
+				if err != nil {
+					return errors.Trace(err)
+				}
+			}
+			col = &Column{
+				PhysicalID: table.PhysicalID,
+				Histogram:  *hg,
+				Info:       colInfo,
+				CMSketch:   cms,
+				TopN:       topN,
+				FMSketch:   fmSketch,
+				ErrorRate:  errorRate,
+				IsHandle:   tableInfo.PKIsHandle && mysql.HasPriKeyFlag(colInfo.GetFlag()),
+				Flag:       flag,
+				StatsVer:   statsVer,
+			}
+			// Column.Count is calculated by Column.TotalRowCount(). Hence we don't set Column.Count when initializing col.
+			col.Count = int64(col.TotalRowCount())
+			// When adding/modifying a column, we create its stats(all values are default values) without setting stats_ver.
+			// So we need add colHist.Count > 0 here.
+			if statsVer != Version0 || col.Count > 0 {
+				col.StatsLoadedStatus = NewStatsFullLoadStatus()
+			}
+			lastAnalyzePos.Copy(&col.LastAnalyzePos)
+			break
+		}
+		if col.TotColSize != totColSize {
+			newCol := *col
+			newCol.TotColSize = totColSize
+			col = &newCol
+		}
+		break
+	}
+	if col != nil {
+		table.Columns[col.ID] = col
+	} else {
+		// If we didn't find a Column or Index in tableInfo, we won't load the histogram for it.
+		// But don't worry, next lease the ddl will be updated, and we will load a same table for two times to
+		// avoid error.
+		logutil.BgLogger().Debug("we cannot find column in table info now. It may be deleted", zap.Int64("colID", histID), zap.String("table", tableInfo.Name.O))
+	}
+	return nil
+}
+
+// TableStatsFromStorage loads table stats info from storage.
+func TableStatsFromStorage(reader *StatsReader, tableInfo *model.TableInfo, physicalID int64, loadAll bool, lease time.Duration, table *Table) (_ *Table, err error) {
+	// If table stats is pseudo, we also need to copy it, since we will use the column stats when
+	// the average error rate of it is small.
+	if table == nil || reader.IsHistory() {
+		histColl := HistColl{
+			PhysicalID:     physicalID,
+			HavePhysicalID: true,
+			Columns:        make(map[int64]*Column, len(tableInfo.Columns)),
+			Indices:        make(map[int64]*Index, len(tableInfo.Indices)),
+		}
+		table = &Table{
+			HistColl: histColl,
+		}
+	} else {
+		// We copy it before writing to avoid race.
+		table = table.Copy()
+	}
+	table.Pseudo = false
+
+	rows, _, err := reader.Read("select modify_count, count from mysql.stats_meta where table_id = %?", physicalID)
+	if err != nil || len(rows) == 0 {
+		return nil, err
+	}
+	table.ModifyCount = rows[0].GetInt64(0)
+	table.Count = rows[0].GetInt64(1)
+
+	rows, _, err = reader.Read("select table_id, is_index, hist_id, distinct_count, version, null_count, tot_col_size, stats_ver, flag, correlation, last_analyze_pos from mysql.stats_histograms where table_id = %?", physicalID)
+	// Check deleted table.
+	if err != nil || len(rows) == 0 {
+		return nil, nil
+	}
+	for _, row := range rows {
+		if row.GetInt64(1) > 0 {
+			err = indexStatsFromStorage(reader, row, table, tableInfo, loadAll)
+		} else {
+			err = columnStatsFromStorage(reader, row, table, tableInfo, loadAll, lease)
+		}
+		if err != nil {
+			return nil, err
+		}
+	}
+	return ExtendedStatsFromStorage(reader, table, physicalID, loadAll)
+}