diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index db0f790d3039..97ed18c71602 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -1219,6 +1219,7 @@ FROM [SHOW STATISTICS USING JSON FOR TABLE all_null] ], "distinct_count": 1, "histo_col_type": "INT8", + "histo_version": 1, "name": "s", "null_count": 1, "row_count": 1 @@ -1375,3 +1376,200 @@ ANALYZE system.jobs # Collecting stats on system.scheduled_jobs is disallowed. statement error pq: cannot create statistics on system.scheduled_jobs ANALYZE system.scheduled_jobs + +# Collecting stats on empty tables should result in empty (but not NULL) +# histograms. +statement ok +CREATE TABLE tabula (r INT, a INT, sa INT, PRIMARY KEY (r), INDEX (a, sa)) + +statement ok +CREATE STATISTICS aristotle FROM tabula + +query TTIB colnames +SELECT statistics_name, column_names, row_count, histogram_id IS NOT NULL AS has_histogram +FROM [SHOW STATISTICS FOR TABLE tabula] +ORDER BY statistics_name, column_names::STRING +---- +statistics_name column_names row_count has_histogram +aristotle {a,sa} 0 false +aristotle {a} 0 true +aristotle {r} 0 true +aristotle {sa} 0 true + +let $hist_id_1 +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE tabula] +WHERE statistics_name = 'aristotle' AND column_names = '{a}' + +# This histogram should be empty. +query TIRI colnames +SHOW HISTOGRAM $hist_id_1 +---- +upper_bound range_rows distinct_range_rows equal_rows + +query T +SELECT jsonb_pretty(COALESCE(json_agg(stat), '[]')) + FROM (SELECT json_array_elements(statistics) - 'created_at' AS stat + FROM [SHOW STATISTICS USING JSON FOR TABLE tabula]) +---- +[ + { + "avg_size": 0, + "columns": [ + "r" + ], + "distinct_count": 0, + "histo_col_type": "INT8", + "histo_version": 1, + "name": "aristotle", + "null_count": 0, + "row_count": 0 + }, + { + "avg_size": 0, + "columns": [ + "a" + ], + "distinct_count": 0, + "histo_col_type": "INT8", + "histo_version": 1, + "name": "aristotle", + "null_count": 0, + "row_count": 0 + }, + { + "avg_size": 0, + "columns": [ + "sa" + ], + "distinct_count": 0, + "histo_col_type": "INT8", + "histo_version": 1, + "name": "aristotle", + "null_count": 0, + "row_count": 0 + }, + { + "avg_size": 0, + "columns": [ + "a", + "sa" + ], + "distinct_count": 0, + "histo_col_type": "", + "name": "aristotle", + "null_count": 0, + "row_count": 0 + } +] + +# Collecting stats on columns with all NULL values should also result in empty +# (but not NULL) histograms. +statement ok +INSERT INTO tabula VALUES (11, 12, NULL) + +statement ok +CREATE STATISTICS locke FROM tabula + +query TTIIB colnames +SELECT statistics_name, column_names, row_count, null_count, histogram_id IS NOT NULL AS has_histogram +FROM [SHOW STATISTICS FOR TABLE tabula] +ORDER BY statistics_name, column_names::STRING +---- +statistics_name column_names row_count null_count has_histogram +locke {a,sa} 1 0 false +locke {a} 1 0 true +locke {r} 1 0 true +locke {sa} 1 1 true + +let $hist_id_1 +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE tabula] +WHERE statistics_name = 'locke' AND column_names = '{a}' + +# This histogram should *not* be empty. +query TIRI colnames +SHOW HISTOGRAM $hist_id_1 +---- +upper_bound range_rows distinct_range_rows equal_rows +12 0 0 1 + +let $hist_id_1 +SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE tabula] +WHERE statistics_name = 'locke' AND column_names = '{sa}' + +# This histogram *should* be empty. +query TIRI colnames +SHOW HISTOGRAM $hist_id_1 +---- +upper_bound range_rows distinct_range_rows equal_rows + +query T +SELECT jsonb_pretty(COALESCE(json_agg(stat), '[]')) + FROM (SELECT json_array_elements(statistics) - 'created_at' AS stat + FROM [SHOW STATISTICS USING JSON FOR TABLE tabula]) +---- +[ + { + "avg_size": 1, + "columns": [ + "r" + ], + "distinct_count": 1, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 1, + "num_range": 0, + "upper_bound": "11" + } + ], + "histo_col_type": "INT8", + "histo_version": 1, + "name": "locke", + "null_count": 0, + "row_count": 1 + }, + { + "avg_size": 2, + "columns": [ + "a" + ], + "distinct_count": 1, + "histo_buckets": [ + { + "distinct_range": 0, + "num_eq": 1, + "num_range": 0, + "upper_bound": "12" + } + ], + "histo_col_type": "INT8", + "histo_version": 1, + "name": "locke", + "null_count": 0, + "row_count": 1 + }, + { + "avg_size": 0, + "columns": [ + "sa" + ], + "distinct_count": 1, + "histo_col_type": "INT8", + "histo_version": 1, + "name": "locke", + "null_count": 1, + "row_count": 1 + }, + { + "avg_size": 2, + "columns": [ + "a", + "sa" + ], + "distinct_count": 1, + "histo_col_type": "", + "name": "locke", + "null_count": 0, + "row_count": 1 + } +] diff --git a/pkg/sql/rowexec/sample_aggregator.go b/pkg/sql/rowexec/sample_aggregator.go index 2a69dd0cd530..035b4d03da16 100644 --- a/pkg/sql/rowexec/sample_aggregator.go +++ b/pkg/sql/rowexec/sample_aggregator.go @@ -436,7 +436,7 @@ func (s *sampleAggregator) writeResults(ctx context.Context) error { if err := s.FlowCtx.Cfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { for _, si := range s.sketches { var histogram *stats.HistogramData - if si.spec.GenerateHistogram && len(s.sr.Get()) != 0 { + if si.spec.GenerateHistogram { colIdx := int(si.spec.Columns[0]) typ := s.inTypes[colIdx] diff --git a/pkg/sql/stats/histogram.go b/pkg/sql/stats/histogram.go index 0da70ed8a36b..e5156c00f7ca 100644 --- a/pkg/sql/stats/histogram.go +++ b/pkg/sql/stats/histogram.go @@ -87,7 +87,9 @@ func EquiDepthHistogram( ) (HistogramData, []cat.HistogramBucket, error) { numSamples := len(samples) if numSamples == 0 { - return HistogramData{ColumnType: colType}, nil, nil + return HistogramData{ + ColumnType: colType, Buckets: make([]HistogramData_Bucket, 0), Version: histVersion, + }, nil, nil } if maxBuckets < 2 { return HistogramData{}, nil, errors.Errorf("histogram requires at least two buckets") diff --git a/pkg/sql/stats/histogram_test.go b/pkg/sql/stats/histogram_test.go index 186639846560..c40ec5f8551b 100644 --- a/pkg/sql/stats/histogram_test.go +++ b/pkg/sql/stats/histogram_test.go @@ -245,6 +245,12 @@ func TestEquiDepthHistogram(t *testing.T) { if err != nil { t.Fatal(err) } + if h.Version != histVersion { + t.Errorf("Invalid histogram version %d expected %d", h.Version, histVersion) + } + if (h.Buckets == nil) != (tc.buckets == nil) { + t.Fatalf("Invalid bucket == nil: %v, expected %v", h.Buckets == nil, tc.buckets == nil) + } if len(h.Buckets) != len(tc.buckets) { t.Fatalf("Invalid number of buckets %d, expected %d", len(h.Buckets), len(tc.buckets)) } diff --git a/pkg/sql/stats/json.go b/pkg/sql/stats/json.go index c1bd214802e7..4ae382b505ee 100644 --- a/pkg/sql/stats/json.go +++ b/pkg/sql/stats/json.go @@ -130,7 +130,7 @@ func (js *JSONStatistic) DecodeAndSetHistogram( func (js *JSONStatistic) GetHistogram( semaCtx *tree.SemaContext, evalCtx *eval.Context, ) (*HistogramData, error) { - if len(js.HistogramBuckets) == 0 { + if js.HistogramColumnType == "" { return nil, nil } h := &HistogramData{}