Skip to content

Commit

Permalink
Merge pull request #82289 from michae2/backport22.1-81793
Browse files Browse the repository at this point in the history
release-22.1: sql/stats: store non-NULL histograms for empty tables
  • Loading branch information
michae2 authored Jun 1, 2022
2 parents e8a6bed + c23e622 commit 97d0c6e
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 3 deletions.
194 changes: 194 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/distsql_stats
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,7 @@ FROM [SHOW STATISTICS USING JSON FOR TABLE all_null]
],
"distinct_count": 1,
"histo_col_type": "INT8",
"histo_version": 1,
"name": "s",
"null_count": 1,
"row_count": 1
Expand Down Expand Up @@ -1348,3 +1349,196 @@ CREATE TABLE t76867 (

statement ok
ANALYZE t76867

# Collecting stats on empty tables should result in empty (but not NULL)
# histograms.
statement ok
CREATE TABLE tabula (r INT, a INT, sa INT, PRIMARY KEY (r), INDEX (a, sa))

statement ok
CREATE STATISTICS aristotle FROM tabula

query TTIB colnames
SELECT statistics_name, column_names, row_count, histogram_id IS NOT NULL AS has_histogram
FROM [SHOW STATISTICS FOR TABLE tabula]
ORDER BY statistics_name, column_names::STRING
----
statistics_name column_names row_count has_histogram
aristotle {a,sa} 0 false
aristotle {a} 0 true
aristotle {r} 0 true
aristotle {sa} 0 true

let $hist_id_1
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE tabula]
WHERE statistics_name = 'aristotle' AND column_names = '{a}'

# This histogram should be empty.
query TIRI colnames
SHOW HISTOGRAM $hist_id_1
----
upper_bound range_rows distinct_range_rows equal_rows

query T
SELECT jsonb_pretty(COALESCE(json_agg(stat), '[]'))
FROM (SELECT json_array_elements(statistics) - 'created_at' AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE tabula])
----
[
{
"avg_size": 0,
"columns": [
"r"
],
"distinct_count": 0,
"histo_col_type": "INT8",
"histo_version": 1,
"name": "aristotle",
"null_count": 0,
"row_count": 0
},
{
"avg_size": 0,
"columns": [
"a"
],
"distinct_count": 0,
"histo_col_type": "INT8",
"histo_version": 1,
"name": "aristotle",
"null_count": 0,
"row_count": 0
},
{
"avg_size": 0,
"columns": [
"sa"
],
"distinct_count": 0,
"histo_col_type": "INT8",
"histo_version": 1,
"name": "aristotle",
"null_count": 0,
"row_count": 0
},
{
"avg_size": 0,
"columns": [
"a",
"sa"
],
"distinct_count": 0,
"histo_col_type": "",
"name": "aristotle",
"null_count": 0,
"row_count": 0
}
]

# Collecting stats on columns with all NULL values should also result in empty
# (but not NULL) histograms.
statement ok
INSERT INTO tabula VALUES (11, 12, NULL)

statement ok
CREATE STATISTICS locke FROM tabula

query TTIIB colnames
SELECT statistics_name, column_names, row_count, null_count, histogram_id IS NOT NULL AS has_histogram
FROM [SHOW STATISTICS FOR TABLE tabula]
ORDER BY statistics_name, column_names::STRING
----
statistics_name column_names row_count null_count has_histogram
locke {a,sa} 1 0 false
locke {a} 1 0 true
locke {r} 1 0 true
locke {sa} 1 1 true

let $hist_id_1
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE tabula]
WHERE statistics_name = 'locke' AND column_names = '{a}'

# This histogram should *not* be empty.
query TIRI colnames
SHOW HISTOGRAM $hist_id_1
----
upper_bound range_rows distinct_range_rows equal_rows
12 0 0 1

let $hist_id_1
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE tabula]
WHERE statistics_name = 'locke' AND column_names = '{sa}'

# This histogram *should* be empty.
query TIRI colnames
SHOW HISTOGRAM $hist_id_1
----
upper_bound range_rows distinct_range_rows equal_rows

query T
SELECT jsonb_pretty(COALESCE(json_agg(stat), '[]'))
FROM (SELECT json_array_elements(statistics) - 'created_at' - 'avg_size' AS stat
FROM [SHOW STATISTICS USING JSON FOR TABLE tabula])
----
[
{
"columns": [
"r"
],
"distinct_count": 1,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "11"
}
],
"histo_col_type": "INT8",
"histo_version": 1,
"name": "locke",
"null_count": 0,
"row_count": 1
},
{
"columns": [
"a"
],
"distinct_count": 1,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "12"
}
],
"histo_col_type": "INT8",
"histo_version": 1,
"name": "locke",
"null_count": 0,
"row_count": 1
},
{
"columns": [
"sa"
],
"distinct_count": 1,
"histo_col_type": "INT8",
"histo_version": 1,
"name": "locke",
"null_count": 1,
"row_count": 1
},
{
"columns": [
"a",
"sa"
],
"distinct_count": 1,
"histo_col_type": "",
"name": "locke",
"null_count": 0,
"row_count": 1
}
]
2 changes: 1 addition & 1 deletion pkg/sql/rowexec/sample_aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ func (s *sampleAggregator) writeResults(ctx context.Context) error {
if err := s.FlowCtx.Cfg.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
for _, si := range s.sketches {
var histogram *stats.HistogramData
if si.spec.GenerateHistogram && len(s.sr.Get()) != 0 {
if si.spec.GenerateHistogram {
colIdx := int(si.spec.Columns[0])
typ := s.inTypes[colIdx]

Expand Down
4 changes: 3 additions & 1 deletion pkg/sql/stats/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ func EquiDepthHistogram(
) (HistogramData, []cat.HistogramBucket, error) {
numSamples := len(samples)
if numSamples == 0 {
return HistogramData{ColumnType: colType}, nil, nil
return HistogramData{
ColumnType: colType, Buckets: make([]HistogramData_Bucket, 0), Version: histVersion,
}, nil, nil
}
if maxBuckets < 2 {
return HistogramData{}, nil, errors.Errorf("histogram requires at least two buckets")
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/stats/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,12 @@ func TestEquiDepthHistogram(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if h.Version != histVersion {
t.Errorf("Invalid histogram version %d expected %d", h.Version, histVersion)
}
if (h.Buckets == nil) != (tc.buckets == nil) {
t.Fatalf("Invalid bucket == nil: %v, expected %v", h.Buckets == nil, tc.buckets == nil)
}
if len(h.Buckets) != len(tc.buckets) {
t.Fatalf("Invalid number of buckets %d, expected %d", len(h.Buckets), len(tc.buckets))
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/stats/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (js *JSONStatistic) DecodeAndSetHistogram(
func (js *JSONStatistic) GetHistogram(
semaCtx *tree.SemaContext, evalCtx *tree.EvalContext,
) (*HistogramData, error) {
if len(js.HistogramBuckets) == 0 {
if js.HistogramColumnType == "" {
return nil, nil
}
h := &HistogramData{}
Expand Down

0 comments on commit 97d0c6e

Please sign in to comment.