Skip to content

Commit

Permalink
sql: add SHOW STATISTICS WITH FORECAST
Browse files Browse the repository at this point in the history
Add a new WITH FORECAST option to SHOW STATISTICS which calculates and
displays forecasted statistics along with the existing table statistics.

Also, forbid injecting forecasted stats.

Assists: cockroachdb#79872

Release note (sql change): Add a new WITH FORECAST option to SHOW
STATISTICS which calculates and displays forecasted statistics along
with the existing table statistics.
  • Loading branch information
michae2 committed Aug 12, 2022
1 parent aa7fa4f commit f86a743
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 24 deletions.
2 changes: 1 addition & 1 deletion docs/generated/sql/bnf/show_stats.bnf
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
show_stats_stmt ::=
'SHOW' 'STATISTICS' 'FOR' 'TABLE' table_name
'SHOW' 'STATISTICS' 'FOR' 'TABLE' table_name opt_with_options
2 changes: 1 addition & 1 deletion docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,7 @@ show_sessions_stmt ::=
| 'SHOW' 'ALL' opt_cluster 'SESSIONS'

show_stats_stmt ::=
'SHOW' 'STATISTICS' 'FOR' 'TABLE' table_name
'SHOW' 'STATISTICS' 'FOR' 'TABLE' table_name opt_with_options

show_tables_stmt ::=
'SHOW' 'TABLES' 'FROM' name '.' name with_comment
Expand Down
12 changes: 12 additions & 0 deletions pkg/sql/alter_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (

"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/security/username"
Expand Down Expand Up @@ -1279,6 +1280,17 @@ func injectTableStats(
return err
}

// Check that we're not injecting any forecasted stats.
for i := range jsonStats {
if jsonStats[i].Name == jobspb.ForecastStatsName {
return errors.WithHintf(
pgerror.New(pgcode.InvalidName, "cannot inject forecasted statistics"),
"either remove forecasts from the statement, or rename them from %q to something else",
jobspb.ForecastStatsName,
)
}
}

// First, delete all statistics for the table.
if _ /* rows */, err := params.extendedEvalCtx.ExecCfg.InternalExecutor.Exec(
params.ctx,
Expand Down
58 changes: 58 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,64 @@ s4 {b,c}
s4 {c,d}
s8 {a}

# Try forecasting stats.
query TT colnames
SELECT statistics_name, column_names
FROM [SHOW STATISTICS FOR TABLE data WITH FORECAST]
ORDER BY statistics_name, column_names::STRING
----
statistics_name column_names
__auto__ {a,b,c,d}
__auto__ {a,b,c,d}
__auto__ {a,b,c,d}
__auto__ {a,b,c,d}
__auto__ {a,b,c,d}
__auto__ {a,b,c}
__auto__ {a,b,c}
__auto__ {a,b,c}
__auto__ {a,b,c}
__auto__ {a,b,c}
__auto__ {a,b}
__auto__ {a,b}
__auto__ {a,b}
__auto__ {a,b}
__auto__ {a,b}
__auto__ {a}
__auto__ {a}
__auto__ {a}
__auto__ {a}
__auto__ {b}
__auto__ {b}
__auto__ {b}
__auto__ {b}
__auto__ {b}
__auto__ {c}
__auto__ {c}
__auto__ {c}
__auto__ {c}
__auto__ {c}
__auto__ {d}
__auto__ {d}
__auto__ {d}
__auto__ {d}
__auto__ {d}
__auto__ {e}
__auto__ {e}
__auto__ {e}
__auto__ {e}
__auto__ {e}
__forecast__ {a,b,c,d}
__forecast__ {a,b,c}
__forecast__ {a,b}
__forecast__ {a}
__forecast__ {b}
__forecast__ {c}
__forecast__ {d}
__forecast__ {e}
s4 {b,c}
s4 {c,d}
s8 {a}

# Test deletion of old non-default stats.

statement ok
Expand Down
17 changes: 13 additions & 4 deletions pkg/sql/parser/sql.y
Original file line number Diff line number Diff line change
Expand Up @@ -6473,17 +6473,26 @@ session_var_parts:
// are encoded in JSON format.
// %SeeAlso: SHOW HISTOGRAM
show_stats_stmt:
SHOW STATISTICS FOR TABLE table_name
SHOW STATISTICS FOR TABLE table_name opt_with_options
{
$$.val = &tree.ShowTableStats{Table: $5.unresolvedObjectName()}
$$.val = &tree.ShowTableStats{
Table: $5.unresolvedObjectName(),
Options: $6.kvOptions(),
}
}
| SHOW STATISTICS USING JSON FOR TABLE table_name
| SHOW STATISTICS USING JSON FOR TABLE table_name opt_with_options
{
/* SKIP DOC */
$$.val = &tree.ShowTableStats{Table: $7.unresolvedObjectName(), UsingJSON: true}
$$.val = &tree.ShowTableStats{
Table: $7.unresolvedObjectName(),
UsingJSON: true,
Options: $8.kvOptions(),
}
}
| SHOW STATISTICS error // SHOW HELP: SHOW STATISTICS
// %Help: SHOW HISTOGRAM - display histogram (experimental)
// %Category: Experimental
// %Text: SHOW HISTOGRAM <histogram_id>
Expand Down
5 changes: 5 additions & 0 deletions pkg/sql/sem/tree/show.go
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,7 @@ func (node *ShowFingerprints) Format(ctx *FmtCtx) {
type ShowTableStats struct {
Table *UnresolvedObjectName
UsingJSON bool
Options KVOptions
}

// Format implements the NodeFormatter interface.
Expand All @@ -737,6 +738,10 @@ func (node *ShowTableStats) Format(ctx *FmtCtx) {
}
ctx.WriteString("FOR TABLE ")
ctx.FormatNode(node.Table)
if len(node.Options) > 0 {
ctx.WriteString(" WITH ")
ctx.FormatNode(&node.Options)
}
}

// ShowHistogram represents a SHOW HISTOGRAM statement.
Expand Down
108 changes: 106 additions & 2 deletions pkg/sql/show_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/errorutil"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/errors"
)

Expand All @@ -41,9 +42,24 @@ var showTableStatsJSONColumns = colinfo.ResultColumns{
{Name: "statistics", Typ: types.Jsonb},
}

const showTableStatsOptForecast = "forecast"

var showTableStatsOptValidate = map[string]KVStringOptValidate{
showTableStatsOptForecast: KVStringOptRequireNoValue,
}

// ShowTableStats returns a SHOW STATISTICS statement for the specified table.
// Privileges: Any privilege on table.
func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (planNode, error) {
optsFn, err := p.TypeAsStringOpts(ctx, n.Options, showTableStatsOptValidate)
if err != nil {
return nil, err
}
opts, err := optsFn()
if err != nil {
return nil, err
}

// We avoid the cache so that we can observe the stats without
// taking a lease, like other SHOW commands.
desc, err := p.ResolveUncachedTableDescriptorEx(ctx, n.Table, true /*required*/, tree.ResolveRequireTableDesc)
Expand All @@ -68,7 +84,9 @@ func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (p
// "handle" which can be used with SHOW HISTOGRAM.
// TODO(yuzefovich): refactor the code to use the iterator API
// (currently it is not possible due to a panic-catcher below).
const stmt = `SELECT "statisticID",
const stmt = `SELECT
"tableID",
"statisticID",
name,
"columnIDs",
"createdAt",
Expand All @@ -92,7 +110,8 @@ func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (p
}

const (
statIDIdx = iota
tableIDIdx = iota
statIDIdx
nameIdx
columnIDsIdx
createdAtIdx
Expand Down Expand Up @@ -121,6 +140,57 @@ func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (p
}
}()

if _, withForecast := opts[showTableStatsOptForecast]; withForecast {
observed := make([]*stats.TableStatistic, 0, len(rows))
for _, row := range rows {
// Skip stats on dropped columns.
colIDs := row[columnIDsIdx].(*tree.DArray).Array
ignoreStatsRowWithDroppedColumn := false
for _, colID := range colIDs {
cid := descpb.ColumnID(*colID.(*tree.DInt))
if _, err := desc.FindColumnWithID(cid); err != nil {
if sqlerrors.IsUndefinedColumnError(err) {
ignoreStatsRowWithDroppedColumn = true
break
} else {
return nil, err
}
}
}
if ignoreStatsRowWithDroppedColumn {
continue
}
stat, err := stats.NewTableStatisticProto(row)
if err != nil {
return nil, err
}
obs := &stats.TableStatistic{TableStatisticProto: *stat}
if obs.HistogramData != nil && !obs.HistogramData.ColumnType.UserDefined() {
if err := stats.DecodeHistogramBuckets(obs); err != nil {
return nil, err
}
}
observed = append(observed, obs)
}

// Reverse the list to sort by CreatedAt descending.
for i := 0; i < len(observed)/2; i++ {
j := len(observed) - i - 1
observed[i], observed[j] = observed[j], observed[i]
}

forecasts := stats.ForecastTableStatistics(ctx, p.EvalContext(), observed)

// Iterate in reverse order to match the ORDER BY "columnIDs".
for i := len(forecasts) - 1; i >= 0; i-- {
forecastRow, err := tableStatisticProtoToRow(&forecasts[i].TableStatisticProto)
if err != nil {
return nil, err
}
rows = append(rows, forecastRow)
}
}

v := p.newContainerValuesNode(columns, 0)
if n.UsingJSON {
result := make([]stats.JSONStatistic, 0, len(rows))
Expand Down Expand Up @@ -228,3 +298,37 @@ func statColumnString(desc catalog.TableDescriptor, colID tree.Datum) (colName s
}
return colDesc.GetName(), nil
}

func tableStatisticProtoToRow(stat *stats.TableStatisticProto) (tree.Datums, error) {
name := tree.DNull
if stat.Name != "" {
name = tree.NewDString(stat.Name)
}
columnIDs := tree.NewDArray(types.Int)
for _, c := range stat.ColumnIDs {
if err := columnIDs.Append(tree.NewDInt(tree.DInt(c))); err != nil {
return nil, err
}
}
row := tree.Datums{
tree.NewDInt(tree.DInt(stat.TableID)),
tree.NewDInt(tree.DInt(stat.StatisticID)),
name,
columnIDs,
&tree.DTimestamp{Time: stat.CreatedAt},
tree.NewDInt(tree.DInt(stat.RowCount)),
tree.NewDInt(tree.DInt(stat.DistinctCount)),
tree.NewDInt(tree.DInt(stat.NullCount)),
tree.NewDInt(tree.DInt(stat.AvgSize)),
}
if stat.HistogramData == nil {
row = append(row, tree.DNull)
} else {
histogram, err := protoutil.Marshal(stat.HistogramData)
if err != nil {
return nil, err
}
row = append(row, tree.NewDBytes(tree.DBytes(histogram)))
}
return row, nil
}
43 changes: 27 additions & 16 deletions pkg/sql/stats/stats_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,11 +487,10 @@ const (
statsLen
)

// parseStats converts the given datums to a TableStatistic object. It might
// need to run a query to get user defined type metadata.
func (sc *TableStatisticsCache) parseStats(
ctx context.Context, datums tree.Datums,
) (*TableStatistic, error) {
// NewTableStatisticProto converts a row of datums from system.table_statistics
// into a TableStatisticsProto. Note that any user-defined types in the
// HistogramData will be unresolved.
func NewTableStatisticProto(datums tree.Datums) (*TableStatisticProto, error) {
if datums == nil || datums.Len() == 0 {
return nil, nil
}
Expand Down Expand Up @@ -528,16 +527,14 @@ func (sc *TableStatisticsCache) parseStats(
}

// Extract datum values.
res := &TableStatistic{
TableStatisticProto: TableStatisticProto{
TableID: descpb.ID((int32)(*datums[tableIDIndex].(*tree.DInt))),
StatisticID: (uint64)(*datums[statisticsIDIndex].(*tree.DInt)),
CreatedAt: datums[createdAtIndex].(*tree.DTimestamp).Time,
RowCount: (uint64)(*datums[rowCountIndex].(*tree.DInt)),
DistinctCount: (uint64)(*datums[distinctCountIndex].(*tree.DInt)),
NullCount: (uint64)(*datums[nullCountIndex].(*tree.DInt)),
AvgSize: (uint64)(*datums[avgSizeIndex].(*tree.DInt)),
},
res := &TableStatisticProto{
TableID: descpb.ID((int32)(*datums[tableIDIndex].(*tree.DInt))),
StatisticID: (uint64)(*datums[statisticsIDIndex].(*tree.DInt)),
CreatedAt: datums[createdAtIndex].(*tree.DTimestamp).Time,
RowCount: (uint64)(*datums[rowCountIndex].(*tree.DInt)),
DistinctCount: (uint64)(*datums[distinctCountIndex].(*tree.DInt)),
NullCount: (uint64)(*datums[nullCountIndex].(*tree.DInt)),
AvgSize: (uint64)(*datums[avgSizeIndex].(*tree.DInt)),
}
columnIDs := datums[columnIDsIndex].(*tree.DArray)
res.ColumnIDs = make([]descpb.ColumnID, len(columnIDs.Array))
Expand All @@ -555,7 +552,21 @@ func (sc *TableStatisticsCache) parseStats(
); err != nil {
return nil, err
}
}
return res, nil
}

// parseStats converts the given datums to a TableStatistic object. It might
// need to run a query to get user defined type metadata.
func (sc *TableStatisticsCache) parseStats(
ctx context.Context, datums tree.Datums,
) (*TableStatistic, error) {
tsp, err := NewTableStatisticProto(datums)
if err != nil {
return nil, err
}
res := &TableStatistic{TableStatisticProto: *tsp}
if res.HistogramData != nil {
// Hydrate the type in case any user defined types are present.
// There are cases where typ is nil, so don't do anything if so.
if typ := res.HistogramData.ColumnType; typ != nil && typ.UserDefined() {
Expand Down Expand Up @@ -671,7 +682,7 @@ func (sc *TableStatisticsCache) getTableStatsFromDB(
) ([]*TableStatistic, error) {
const getTableStatisticsStmt = `
SELECT
"tableID",
"tableID",
"statisticID",
name,
"columnIDs",
Expand Down

0 comments on commit f86a743

Please sign in to comment.