diff --git a/docs/generated/sql/bnf/show_stats.bnf b/docs/generated/sql/bnf/show_stats.bnf index a764b190910d..e05f722c343a 100644 --- a/docs/generated/sql/bnf/show_stats.bnf +++ b/docs/generated/sql/bnf/show_stats.bnf @@ -1,2 +1,2 @@ show_stats_stmt ::= - 'SHOW' 'STATISTICS' 'FOR' 'TABLE' table_name + 'SHOW' 'STATISTICS' 'FOR' 'TABLE' table_name opt_with_options diff --git a/docs/generated/sql/bnf/stmt_block.bnf b/docs/generated/sql/bnf/stmt_block.bnf index 4761f1af5968..e8a856de290f 100644 --- a/docs/generated/sql/bnf/stmt_block.bnf +++ b/docs/generated/sql/bnf/stmt_block.bnf @@ -877,7 +877,7 @@ show_sessions_stmt ::= | 'SHOW' 'ALL' opt_cluster 'SESSIONS' show_stats_stmt ::= - 'SHOW' 'STATISTICS' 'FOR' 'TABLE' table_name + 'SHOW' 'STATISTICS' 'FOR' 'TABLE' table_name opt_with_options show_tables_stmt ::= 'SHOW' 'TABLES' 'FROM' name '.' name with_comment diff --git a/pkg/sql/alter_table.go b/pkg/sql/alter_table.go index f7f6d8e7212d..812c5184b76a 100644 --- a/pkg/sql/alter_table.go +++ b/pkg/sql/alter_table.go @@ -20,6 +20,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/jobs" + "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/security/username" @@ -1279,6 +1280,17 @@ func injectTableStats( return err } + // Check that we're not injecting any forecasted stats. + for i := range jsonStats { + if jsonStats[i].Name == jobspb.ForecastStatsName { + return errors.WithHintf( + pgerror.New(pgcode.InvalidName, "cannot inject forecasted statistics"), + "either remove forecasts from the statement, or rename them from %q to something else", + jobspb.ForecastStatsName, + ) + } + } + // First, delete all statistics for the table. if _ /* rows */, err := params.extendedEvalCtx.ExecCfg.InternalExecutor.Exec( params.ctx, diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index 4a7299e51faf..93ffa95720eb 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -616,6 +616,64 @@ s4 {b,c} s4 {c,d} s8 {a} +# Try forecasting stats. +query TT colnames +SELECT statistics_name, column_names +FROM [SHOW STATISTICS FOR TABLE data WITH FORECAST] +ORDER BY statistics_name, column_names::STRING +---- +statistics_name column_names +__auto__ {a,b,c,d} +__auto__ {a,b,c,d} +__auto__ {a,b,c,d} +__auto__ {a,b,c,d} +__auto__ {a,b,c,d} +__auto__ {a,b,c} +__auto__ {a,b,c} +__auto__ {a,b,c} +__auto__ {a,b,c} +__auto__ {a,b,c} +__auto__ {a,b} +__auto__ {a,b} +__auto__ {a,b} +__auto__ {a,b} +__auto__ {a,b} +__auto__ {a} +__auto__ {a} +__auto__ {a} +__auto__ {a} +__auto__ {b} +__auto__ {b} +__auto__ {b} +__auto__ {b} +__auto__ {b} +__auto__ {c} +__auto__ {c} +__auto__ {c} +__auto__ {c} +__auto__ {c} +__auto__ {d} +__auto__ {d} +__auto__ {d} +__auto__ {d} +__auto__ {d} +__auto__ {e} +__auto__ {e} +__auto__ {e} +__auto__ {e} +__auto__ {e} +__forecast__ {a,b,c,d} +__forecast__ {a,b,c} +__forecast__ {a,b} +__forecast__ {a} +__forecast__ {b} +__forecast__ {c} +__forecast__ {d} +__forecast__ {e} +s4 {b,c} +s4 {c,d} +s8 {a} + # Test deletion of old non-default stats. statement ok diff --git a/pkg/sql/parser/sql.y b/pkg/sql/parser/sql.y index b47371f950ae..63f67224a73f 100644 --- a/pkg/sql/parser/sql.y +++ b/pkg/sql/parser/sql.y @@ -6473,17 +6473,26 @@ session_var_parts: // are encoded in JSON format. // %SeeAlso: SHOW HISTOGRAM show_stats_stmt: - SHOW STATISTICS FOR TABLE table_name + SHOW STATISTICS FOR TABLE table_name opt_with_options { - $$.val = &tree.ShowTableStats{Table: $5.unresolvedObjectName()} + $$.val = &tree.ShowTableStats{ + Table: $5.unresolvedObjectName(), + Options: $6.kvOptions(), + } } -| SHOW STATISTICS USING JSON FOR TABLE table_name +| SHOW STATISTICS USING JSON FOR TABLE table_name opt_with_options { /* SKIP DOC */ - $$.val = &tree.ShowTableStats{Table: $7.unresolvedObjectName(), UsingJSON: true} + $$.val = &tree.ShowTableStats{ + Table: $7.unresolvedObjectName(), + UsingJSON: true, + Options: $8.kvOptions(), + } } | SHOW STATISTICS error // SHOW HELP: SHOW STATISTICS + + // %Help: SHOW HISTOGRAM - display histogram (experimental) // %Category: Experimental // %Text: SHOW HISTOGRAM diff --git a/pkg/sql/sem/tree/show.go b/pkg/sql/sem/tree/show.go index 4d6e1de8a1d2..c791b25368f5 100644 --- a/pkg/sql/sem/tree/show.go +++ b/pkg/sql/sem/tree/show.go @@ -727,6 +727,7 @@ func (node *ShowFingerprints) Format(ctx *FmtCtx) { type ShowTableStats struct { Table *UnresolvedObjectName UsingJSON bool + Options KVOptions } // Format implements the NodeFormatter interface. @@ -737,6 +738,10 @@ func (node *ShowTableStats) Format(ctx *FmtCtx) { } ctx.WriteString("FOR TABLE ") ctx.FormatNode(node.Table) + if len(node.Options) > 0 { + ctx.WriteString(" WITH ") + ctx.FormatNode(&node.Options) + } } // ShowHistogram represents a SHOW HISTOGRAM statement. diff --git a/pkg/sql/show_stats.go b/pkg/sql/show_stats.go index d7f038657147..d683b246b6b7 100644 --- a/pkg/sql/show_stats.go +++ b/pkg/sql/show_stats.go @@ -23,6 +23,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/errorutil" "github.com/cockroachdb/cockroach/pkg/util/json" + "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/errors" ) @@ -41,9 +42,24 @@ var showTableStatsJSONColumns = colinfo.ResultColumns{ {Name: "statistics", Typ: types.Jsonb}, } +const showTableStatsOptForecast = "forecast" + +var showTableStatsOptValidate = map[string]KVStringOptValidate{ + showTableStatsOptForecast: KVStringOptRequireNoValue, +} + // ShowTableStats returns a SHOW STATISTICS statement for the specified table. // Privileges: Any privilege on table. func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (planNode, error) { + optsFn, err := p.TypeAsStringOpts(ctx, n.Options, showTableStatsOptValidate) + if err != nil { + return nil, err + } + opts, err := optsFn() + if err != nil { + return nil, err + } + // We avoid the cache so that we can observe the stats without // taking a lease, like other SHOW commands. desc, err := p.ResolveUncachedTableDescriptorEx(ctx, n.Table, true /*required*/, tree.ResolveRequireTableDesc) @@ -68,7 +84,9 @@ func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (p // "handle" which can be used with SHOW HISTOGRAM. // TODO(yuzefovich): refactor the code to use the iterator API // (currently it is not possible due to a panic-catcher below). - const stmt = `SELECT "statisticID", + const stmt = `SELECT + "tableID", + "statisticID", name, "columnIDs", "createdAt", @@ -92,7 +110,8 @@ func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (p } const ( - statIDIdx = iota + tableIDIdx = iota + statIDIdx nameIdx columnIDsIdx createdAtIdx @@ -121,6 +140,57 @@ func (p *planner) ShowTableStats(ctx context.Context, n *tree.ShowTableStats) (p } }() + if _, withForecast := opts[showTableStatsOptForecast]; withForecast { + observed := make([]*stats.TableStatistic, 0, len(rows)) + for _, row := range rows { + // Skip stats on dropped columns. + colIDs := row[columnIDsIdx].(*tree.DArray).Array + ignoreStatsRowWithDroppedColumn := false + for _, colID := range colIDs { + cid := descpb.ColumnID(*colID.(*tree.DInt)) + if _, err := desc.FindColumnWithID(cid); err != nil { + if sqlerrors.IsUndefinedColumnError(err) { + ignoreStatsRowWithDroppedColumn = true + break + } else { + return nil, err + } + } + } + if ignoreStatsRowWithDroppedColumn { + continue + } + stat, err := stats.NewTableStatisticProto(row) + if err != nil { + return nil, err + } + obs := &stats.TableStatistic{TableStatisticProto: *stat} + if obs.HistogramData != nil && !obs.HistogramData.ColumnType.UserDefined() { + if err := stats.DecodeHistogramBuckets(obs); err != nil { + return nil, err + } + } + observed = append(observed, obs) + } + + // Reverse the list to sort by CreatedAt descending. + for i := 0; i < len(observed)/2; i++ { + j := len(observed) - i - 1 + observed[i], observed[j] = observed[j], observed[i] + } + + forecasts := stats.ForecastTableStatistics(ctx, p.EvalContext(), observed) + + // Iterate in reverse order to match the ORDER BY "columnIDs". + for i := len(forecasts) - 1; i >= 0; i-- { + forecastRow, err := tableStatisticProtoToRow(&forecasts[i].TableStatisticProto) + if err != nil { + return nil, err + } + rows = append(rows, forecastRow) + } + } + v := p.newContainerValuesNode(columns, 0) if n.UsingJSON { result := make([]stats.JSONStatistic, 0, len(rows)) @@ -228,3 +298,37 @@ func statColumnString(desc catalog.TableDescriptor, colID tree.Datum) (colName s } return colDesc.GetName(), nil } + +func tableStatisticProtoToRow(stat *stats.TableStatisticProto) (tree.Datums, error) { + name := tree.DNull + if stat.Name != "" { + name = tree.NewDString(stat.Name) + } + columnIDs := tree.NewDArray(types.Int) + for _, c := range stat.ColumnIDs { + if err := columnIDs.Append(tree.NewDInt(tree.DInt(c))); err != nil { + return nil, err + } + } + row := tree.Datums{ + tree.NewDInt(tree.DInt(stat.TableID)), + tree.NewDInt(tree.DInt(stat.StatisticID)), + name, + columnIDs, + &tree.DTimestamp{Time: stat.CreatedAt}, + tree.NewDInt(tree.DInt(stat.RowCount)), + tree.NewDInt(tree.DInt(stat.DistinctCount)), + tree.NewDInt(tree.DInt(stat.NullCount)), + tree.NewDInt(tree.DInt(stat.AvgSize)), + } + if stat.HistogramData == nil { + row = append(row, tree.DNull) + } else { + histogram, err := protoutil.Marshal(stat.HistogramData) + if err != nil { + return nil, err + } + row = append(row, tree.NewDBytes(tree.DBytes(histogram))) + } + return row, nil +} diff --git a/pkg/sql/stats/stats_cache.go b/pkg/sql/stats/stats_cache.go index b3e7c13502cf..1e9e1cc252d4 100644 --- a/pkg/sql/stats/stats_cache.go +++ b/pkg/sql/stats/stats_cache.go @@ -487,11 +487,10 @@ const ( statsLen ) -// parseStats converts the given datums to a TableStatistic object. It might -// need to run a query to get user defined type metadata. -func (sc *TableStatisticsCache) parseStats( - ctx context.Context, datums tree.Datums, -) (*TableStatistic, error) { +// NewTableStatisticProto converts a row of datums from system.table_statistics +// into a TableStatisticsProto. Note that any user-defined types in the +// HistogramData will be unresolved. +func NewTableStatisticProto(datums tree.Datums) (*TableStatisticProto, error) { if datums == nil || datums.Len() == 0 { return nil, nil } @@ -528,16 +527,14 @@ func (sc *TableStatisticsCache) parseStats( } // Extract datum values. - res := &TableStatistic{ - TableStatisticProto: TableStatisticProto{ - TableID: descpb.ID((int32)(*datums[tableIDIndex].(*tree.DInt))), - StatisticID: (uint64)(*datums[statisticsIDIndex].(*tree.DInt)), - CreatedAt: datums[createdAtIndex].(*tree.DTimestamp).Time, - RowCount: (uint64)(*datums[rowCountIndex].(*tree.DInt)), - DistinctCount: (uint64)(*datums[distinctCountIndex].(*tree.DInt)), - NullCount: (uint64)(*datums[nullCountIndex].(*tree.DInt)), - AvgSize: (uint64)(*datums[avgSizeIndex].(*tree.DInt)), - }, + res := &TableStatisticProto{ + TableID: descpb.ID((int32)(*datums[tableIDIndex].(*tree.DInt))), + StatisticID: (uint64)(*datums[statisticsIDIndex].(*tree.DInt)), + CreatedAt: datums[createdAtIndex].(*tree.DTimestamp).Time, + RowCount: (uint64)(*datums[rowCountIndex].(*tree.DInt)), + DistinctCount: (uint64)(*datums[distinctCountIndex].(*tree.DInt)), + NullCount: (uint64)(*datums[nullCountIndex].(*tree.DInt)), + AvgSize: (uint64)(*datums[avgSizeIndex].(*tree.DInt)), } columnIDs := datums[columnIDsIndex].(*tree.DArray) res.ColumnIDs = make([]descpb.ColumnID, len(columnIDs.Array)) @@ -555,7 +552,21 @@ func (sc *TableStatisticsCache) parseStats( ); err != nil { return nil, err } + } + return res, nil +} +// parseStats converts the given datums to a TableStatistic object. It might +// need to run a query to get user defined type metadata. +func (sc *TableStatisticsCache) parseStats( + ctx context.Context, datums tree.Datums, +) (*TableStatistic, error) { + tsp, err := NewTableStatisticProto(datums) + if err != nil { + return nil, err + } + res := &TableStatistic{TableStatisticProto: *tsp} + if res.HistogramData != nil { // Hydrate the type in case any user defined types are present. // There are cases where typ is nil, so don't do anything if so. if typ := res.HistogramData.ColumnType; typ != nil && typ.UserDefined() { @@ -671,7 +682,7 @@ func (sc *TableStatisticsCache) getTableStatsFromDB( ) ([]*TableStatistic, error) { const getTableStatisticsStmt = ` SELECT - "tableID", + "tableID", "statisticID", name, "columnIDs",