Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql/stats: include partial stats in results of statsCache.GetTableStats #95145

Merged
merged 1 commit into from
Jan 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions pkg/ccl/backupccl/testdata/backup-restore/metadata
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ exec-sql
BACKUP DATABASE db1 INTO 'nodelocal://0/test/'
----

# TODO(ssd): The expectation here is 6 stats rather than 7 because the
# 'partial' stat is not backed up even though it is persisted. I think
# we may want a different API for fetching statistics.
query-sql
SELECT
json_array_length(
Expand All @@ -72,4 +69,4 @@ SELECT
-> 'statistics'
)
----
6
7
25 changes: 14 additions & 11 deletions pkg/sql/distsql_plan_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,19 +223,22 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
// column that is not partial and not forecasted. The first one we find will
// be the latest due to the newest to oldest ordering property of the cache.
for _, t := range tableStats {
// TODO (faizaanmadhani): Ideally, we don't want to verify that
// a statistic is forecasted or merged based on the name because
// someone could create a statistic named __forecast__ or __merged__.
// Update system.table_statistics to add an enum to indicate which
// type of statistic it is.
if len(t.ColumnIDs) == 1 && column.GetID() == t.ColumnIDs[0] && t.PartialPredicate == "" &&
t.Name != jobspb.ForecastStatsName &&
t.Name != jobspb.MergedStatsName {
if len(t.ColumnIDs) == 1 && column.GetID() == t.ColumnIDs[0] &&
!t.IsPartial() && !t.IsMerged() && !t.IsForecast() {
if t.HistogramData == nil || t.HistogramData.ColumnType == nil || len(t.Histogram) == 0 {
return nil, pgerror.Newf(pgcode.ObjectNotInPrerequisiteState, "the latest full statistic for column %s has no histogram", column.GetName())
return nil, pgerror.Newf(
pgcode.ObjectNotInPrerequisiteState,
"the latest full statistic for column %s has no histogram",
column.GetName(),
)
}
if colinfo.ColumnTypeIsInvertedIndexable(column.GetType()) && t.HistogramData.ColumnType.Family() == types.BytesFamily {
return nil, pgerror.Newf(pgcode.ObjectNotInPrerequisiteState, "the latest full statistic histogram for column %s is an inverted index histogram", column.GetName())
if colinfo.ColumnTypeIsInvertedIndexable(column.GetType()) &&
t.HistogramData.ColumnType.Family() == types.BytesFamily {
return nil, pgerror.Newf(
pgcode.ObjectNotInPrerequisiteState,
"the latest full statistic histogram for column %s is an inverted index histogram",
column.GetName(),
)
}
stat = t
histogram = t.Histogram
Expand Down
15 changes: 14 additions & 1 deletion pkg/sql/opt/cat/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,21 @@ type TableStatistic interface {
// inverted index histograms, this will always return types.Bytes.
HistogramType() *types.T

// IsForecast returns true if this statistic is a forecast.
// IsPartial returns true if this statistic was collected with a where
// clause. (If the where clause was something like "WHERE 1 = 1" or "WHERE
// true" this could technically be a full statistic rather than a partial
// statistic, but this function does not check for this.)
IsPartial() bool

// IsMerged returns true if this statistic was created by merging a partial
// and a full statistic.
IsMerged() bool

// IsForecast returns true if this statistic was created by forecasting.
IsForecast() bool

// IsAuto returns true if this statistic was collected automatically.
IsAuto() bool
}

// HistogramBucket contains the data for a single histogram bucket. Note
Expand Down
24 changes: 14 additions & 10 deletions pkg/sql/opt/exec/execbuilder/relational.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,12 +403,12 @@ func (b *Builder) maybeAnnotateWithEstimates(node exec.Node, e memo.RelExpr) {
if scan, ok := e.(*memo.ScanExpr); ok {
tab := b.mem.Metadata().Table(scan.Table)
if tab.StatisticCount() > 0 {
// The first stat is the most recent one.
// The first stat is the most recent full one.
var first int
if !b.evalCtx.SessionData().OptimizerUseForecasts {
for first < tab.StatisticCount() && tab.Statistic(first).IsForecast() {
first++
}
for first < tab.StatisticCount() &&
tab.Statistic(first).IsPartial() ||
(tab.Statistic(first).IsForecast() && !b.evalCtx.SessionData().OptimizerUseForecasts) {
first++
}

if first < tab.StatisticCount() {
Expand All @@ -422,10 +422,10 @@ func (b *Builder) maybeAnnotateWithEstimates(node exec.Node, e memo.RelExpr) {
val.Forecast = stat.IsForecast()
if val.Forecast {
val.ForecastAt = stat.CreatedAt()
// Find the first non-forecast stat.
// Find the first non-forecast full stat.
for i := first + 1; i < tab.StatisticCount(); i++ {
nextStat := tab.Statistic(i)
if !nextStat.IsForecast() {
if !nextStat.IsPartial() && !nextStat.IsForecast() {
val.TableStatsCreatedAt = nextStat.CreatedAt()
break
}
Expand Down Expand Up @@ -760,8 +760,11 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (execPlan, error) {
b.TotalScanRows += stats.RowCount
b.ScanCounts[exec.ScanWithStatsCount]++

// The first stat is the most recent one. Check if it was a forecast.
// The first stat is the most recent full one. Check if it was a forecast.
var first int
for first < tab.StatisticCount() && tab.Statistic(first).IsPartial() {
first++
}
if first < tab.StatisticCount() && tab.Statistic(first).IsForecast() {
if b.evalCtx.SessionData().OptimizerUseForecasts {
b.ScanCounts[exec.ScanWithStatsForecastCount]++
Expand All @@ -772,8 +775,9 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (execPlan, error) {
b.NanosSinceStatsForecasted = nanosSinceStatsForecasted
}
}
// Find the first non-forecast stat.
for first < tab.StatisticCount() && tab.Statistic(first).IsForecast() {
// Find the first non-forecast full stat.
for first < tab.StatisticCount() &&
(tab.Statistic(first).IsPartial() || tab.Statistic(first).IsForecast()) {
first++
}
}
Expand Down
13 changes: 8 additions & 5 deletions pkg/sql/opt/memo/statistics_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,12 +614,12 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati
// Make now and annotate the metadata table with it for next time.
stats = &props.Statistics{}

// Find the most recent statistic. (Stats are ordered with most recent first.)
// Find the most recent full statistic. (Stats are ordered with most recent first.)
var first int
if !sb.evalCtx.SessionData().OptimizerUseForecasts {
for first < tab.StatisticCount() && tab.Statistic(first).IsForecast() {
first++
}
for first < tab.StatisticCount() &&
(tab.Statistic(first).IsPartial() ||
tab.Statistic(first).IsForecast() && !sb.evalCtx.SessionData().OptimizerUseForecasts) {
first++
}

if first >= tab.StatisticCount() {
Expand All @@ -639,6 +639,9 @@ func (sb *statisticsBuilder) makeTableStatistics(tabID opt.TableID) *props.Stati
// column set. Stats are ordered with most recent first.
for i := first; i < tab.StatisticCount(); i++ {
stat := tab.Statistic(i)
if stat.IsPartial() {
continue
}
if stat.IsForecast() && !sb.evalCtx.SessionData().OptimizerUseForecasts {
continue
}
Expand Down
1 change: 0 additions & 1 deletion pkg/sql/opt/testutils/testcat/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ go_library(
deps = [
"//pkg/config/zonepb",
"//pkg/geo/geoindex",
"//pkg/jobs/jobspb",
"//pkg/roachpb",
"//pkg/security/username",
"//pkg/settings/cluster",
Expand Down
18 changes: 16 additions & 2 deletions pkg/sql/opt/testutils/testcat/test_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (

"github.com/cockroachdb/cockroach/pkg/config/zonepb"
"github.com/cockroachdb/cockroach/pkg/geo/geoindex"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security/username"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
Expand Down Expand Up @@ -1294,9 +1293,24 @@ func (ts *TableStat) HistogramType() *types.T {
return ts.histogramType
}

// IsPartial is part of the cat.TableStatistic interface.
func (ts *TableStat) IsPartial() bool {
return ts.js.IsPartial()
}

// IsMerged is part of the cat.TableStatistic interface.
func (ts *TableStat) IsMerged() bool {
return ts.js.IsMerged()
}

// IsForecast is part of the cat.TableStatistic interface.
func (ts *TableStat) IsForecast() bool {
return ts.js.Name == jobspb.ForecastStatsName
return ts.js.IsForecast()
}

// IsAuto is part of the cat.TableStatistic interface.
func (ts *TableStat) IsAuto() bool {
return ts.js.IsAuto()
}

// TableStats is a slice of TableStat pointers.
Expand Down
18 changes: 16 additions & 2 deletions pkg/sql/opt_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (

"github.com/cockroachdb/cockroach/pkg/config"
"github.com/cockroachdb/cockroach/pkg/geo/geoindex"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/roachpb"
Expand Down Expand Up @@ -1773,9 +1772,24 @@ func (os *optTableStat) HistogramType() *types.T {
return os.stat.HistogramData.ColumnType
}

// IsPartial is part of the cat.TableStatistic interface.
func (os *optTableStat) IsPartial() bool {
return os.stat.IsPartial()
}

// IsMerged is part of the cat.TableStatistic interface.
func (os *optTableStat) IsMerged() bool {
return os.stat.IsMerged()
}

// IsForecast is part of the cat.TableStatistic interface.
func (os *optTableStat) IsForecast() bool {
return os.stat.Name == jobspb.ForecastStatsName
return os.stat.IsForecast()
}

// IsAuto is part of the cat.TableStatistic interface.
func (os *optTableStat) IsAuto() bool {
return os.stat.IsAuto()
}

// optFamily is a wrapper around descpb.ColumnFamilyDescriptor that keeps a
Expand Down
63 changes: 25 additions & 38 deletions pkg/sql/show_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"context"
encjson "encoding/json"
"fmt"
"sort"

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
Expand Down Expand Up @@ -194,10 +193,8 @@ func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (p

_, withMerge := opts[showTableStatsOptMerge]
_, withForecast := opts[showTableStatsOptForecast]

obsFullStats := make([]*stats.TableStatistic, 0, len(rows))
obsPartialStats := make([]*stats.TableStatistic, 0, len(rows))
if withMerge || withForecast {
statsList := make([]*stats.TableStatistic, 0, len(rows))
for _, row := range rows {
// Skip stats on dropped columns.
colIDs := row[columnIDsIdx].(*tree.DArray).Array
Expand All @@ -215,48 +212,38 @@ func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (p
return nil, err
}
}
if obs.PartialPredicate != "" {
obsPartialStats = append(obsPartialStats, obs)
} else {
obsFullStats = append(obsFullStats, obs)
}
statsList = append(statsList, obs)
}

// Reverse the lists to sort by CreatedAt descending.
for i := 0; i < len(obsFullStats)/2; i++ {
j := len(obsFullStats) - i - 1
obsFullStats[i], obsFullStats[j] = obsFullStats[j], obsFullStats[i]
}
for i := 0; i < len(obsPartialStats)/2; i++ {
j := len(obsPartialStats) - i - 1
obsPartialStats[i], obsPartialStats[j] = obsPartialStats[j], obsPartialStats[i]
// Reverse the list to sort by CreatedAt descending.
for i := 0; i < len(statsList)/2; i++ {
j := len(statsList) - i - 1
statsList[i], statsList[j] = statsList[j], statsList[i]
}
}

if withMerge {
merged := stats.MergedStatistics(ctx, obsPartialStats, obsFullStats)
obsFullStats = append(obsFullStats, merged...)
sort.Slice(obsFullStats, func(i, j int) bool {
return obsFullStats[i].CreatedAt.After(obsFullStats[j].CreatedAt)
})
for i := len(merged) - 1; i >= 0; i-- {
mergedRow, err := tableStatisticProtoToRow(&merged[i].TableStatisticProto, partialStatsVerActive)
if err != nil {
return nil, err
if withMerge {
merged := stats.MergedStatistics(ctx, statsList)
statsList = append(merged, statsList...)
// Iterate in reverse order to match the ORDER BY "columnIDs".
for i := len(merged) - 1; i >= 0; i-- {
mergedRow, err := tableStatisticProtoToRow(&merged[i].TableStatisticProto, partialStatsVerActive)
if err != nil {
return nil, err
}
rows = append(rows, mergedRow)
}
rows = append(rows, mergedRow)
}
}

if withForecast {
forecasts := stats.ForecastTableStatistics(ctx, obsFullStats)
// Iterate in reverse order to match the ORDER BY "columnIDs".
for i := len(forecasts) - 1; i >= 0; i-- {
forecastRow, err := tableStatisticProtoToRow(&forecasts[i].TableStatisticProto, partialStatsVerActive)
if err != nil {
return nil, err
if withForecast {
forecasts := stats.ForecastTableStatistics(ctx, statsList)
// Iterate in reverse order to match the ORDER BY "columnIDs".
for i := len(forecasts) - 1; i >= 0; i-- {
forecastRow, err := tableStatisticProtoToRow(&forecasts[i].TableStatisticProto, partialStatsVerActive)
if err != nil {
return nil, err
}
rows = append(rows, forecastRow)
}
rows = append(rows, forecastRow)
}
}

Expand Down
15 changes: 7 additions & 8 deletions pkg/sql/stats/automatic_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ func (r *Refresher) maybeRefreshStats(
// refresh happens at exactly 2x the current average, and the average
// refresh time is calculated from the most recent 4 refreshes. See the
// comment in stats/delete_stats.go.)
maxTimeBetweenRefreshes := stat.CreatedAt.Add(2*avgRefreshTime(tableStats) + r.extraTime)
maxTimeBetweenRefreshes := stat.CreatedAt.Add(2*avgFullRefreshTime(tableStats) + r.extraTime)
if timeutil.Now().After(maxTimeBetweenRefreshes) {
mustRefresh = true
}
Expand Down Expand Up @@ -789,21 +789,20 @@ func mostRecentAutomaticStat(tableStats []*TableStatistic) *TableStatistic {
return nil
}

// avgRefreshTime returns the average time between automatic statistics
// avgFullRefreshTime returns the average time between automatic full statistics
// refreshes given a list of tableStats from one table. It does so by finding
// the most recent automatically generated statistic (identified by the name
// AutoStatsName), and then finds all previously generated automatic stats on
// those same columns. The average is calculated as the average time between
// each consecutive stat.
// the most recent automatically generated statistic and then finds all
// previously generated automatic stats on those same columns. The average is
// calculated as the average time between each consecutive stat.
//
// If there are not at least two automatically generated statistics on the same
// columns, the default value defaultAverageTimeBetweenRefreshes is returned.
func avgRefreshTime(tableStats []*TableStatistic) time.Duration {
func avgFullRefreshTime(tableStats []*TableStatistic) time.Duration {
var reference *TableStatistic
var sum time.Duration
var count int
for _, stat := range tableStats {
if stat.Name != jobspb.AutoStatsName {
if !stat.IsAuto() || stat.IsPartial() {
continue
}
if reference == nil {
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/stats/automatic_stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,8 @@ func TestAverageRefreshTime(t *testing.T) {
if err != nil {
return err
}
if actual := avgRefreshTime(stats).Round(time.Minute); actual != expected {
return fmt.Errorf("expected avgRefreshTime %s but found %s",
if actual := avgFullRefreshTime(stats).Round(time.Minute); actual != expected {
return fmt.Errorf("expected avgFullRefreshTime %s but found %s",
expected.String(), actual.String())
}
return nil
Expand Down Expand Up @@ -350,7 +350,7 @@ func TestAverageRefreshTime(t *testing.T) {
})
}

// Since there are no stats yet, avgRefreshTime should return the default
// Since there are no stats yet, avgFullRefreshTime should return the default
// value.
if err := checkAverageRefreshTime(defaultAverageTimeBetweenRefreshes); err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -410,7 +410,7 @@ func TestAverageRefreshTime(t *testing.T) {
t.Fatal(err)
}

// None of the stats have the name AutoStatsName, so avgRefreshTime
// None of the stats have the name AutoStatsName, so avgFullRefreshTime
// should still return the default value.
if err := checkAverageRefreshTime(defaultAverageTimeBetweenRefreshes); err != nil {
t.Fatal(err)
Expand Down
Loading